From e6110b9c9e53d197d912c707042058e3c089d756 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Tue, 10 Dec 2024 21:38:12 +0000
Subject: [PATCH 01/12] thinking about KDE implementation

---
 spatial_compare/spatial_compare.py |   1 -
 spatial_compare/utils.py           | 170 +++++++++++++++++++++--------
 2 files changed, 126 insertions(+), 45 deletions(-)

diff --git a/spatial_compare/spatial_compare.py b/spatial_compare/spatial_compare.py
index 30e2f31..dc11a8d 100644
--- a/spatial_compare/spatial_compare.py
+++ b/spatial_compare/spatial_compare.py
@@ -14,7 +14,6 @@
 
 from spatial_compare.utils import grouped_obs_mean
 
-
 DEFAULT_DATA_NAMES = ["Data 0", "Data 1"]
 
 
diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 1e4ca02..f6828c8 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
-
+import scipy as sp
 
 def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
     """
@@ -47,17 +47,106 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
 
     return out
 
+def spatial_detection_score_kde(
+    query: pd.DataFrame,
+    grid_out: int = 100j
+):
+    cell_x = query["x_centroid"]
+    cell_y = query["y_centroid"]
+    
+    xmin, xmax = cell_x.min(), cell_x.max()
+    ymin, ymax = cell_y.min(), cell_y.max()
+    extent = [xmin, xmax, ymin, ymax]
+    dmax = (xmax-xmin)**2 + (ymax-ymin)**2
+    
+    X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out]
+    positions = np.vstack([X.ravel(), Y.ravel()])
+    
+    cell_coords = np.vstack(
+        [
+            cell_x.values,
+            cell_y.values
+        ]
+    );
+
+    scores = [];
+    for column in ["detection_relative_z_score", "detection_difference", "log_10_detection_ratio"]:
+        weights = np.abs(query[column].values)
+        weights[np.isnan(weights)] = 0
+        #weights = weights + abs(weights.min())
+
+        kde = sp.stats.gaussian_kde(
+            cell_coords,
+            weights=weights
+        )(positions).T
+        #Z = np.reshape(kde, X.shape).T
+
+        output = np.zeros(kde.shape)
+        for i, z in enumerate(kde):
+            coord = positions[:, i]
+            output[i] = z * np.sum(np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights)
+        Z = np.reshape(output, X.shape).T
+        
+        scores.append(Z)
+
+    return (
+        extent,
+        scores[0],
+        scores[1],
+        scores[2],
+        np.ones(Z.shape)
+    )
+
+def spatial_detection_score_binned(
+    query: pd.DataFrame,
+    n_bins: int = 50,
+):
+    query["xy_bucket"] = list(
+        zip(
+            pd.cut(query.x_centroid, n_bins, labels=list(range(n_bins))),
+            pd.cut(query.y_centroid, n_bins, labels=list(range(n_bins))),
+        )
+    )
+
+    binx = query.groupby("xy_bucket").x_centroid.mean()
+    biny = query.groupby("xy_bucket").y_centroid.mean()
+
+    z_score = query.groupby("xy_bucket").detection_relative_z_score.mean()
+    difference = query.groupby("xy_bucket").detection_difference.mean()
+    log_ratio = query.groupby("xy_bucket").log_10_detection_ratio.mean()
+    n_cells = query.groupby("xy_bucket").x_centroid.count()
+
+    bin_image_z_score = np.zeros([n_bins, n_bins])
+    bin_image_difference = np.zeros([n_bins, n_bins])
+    bin_image_ratio = np.zeros([n_bins, n_bins])
+    bin_image_counts = np.zeros([n_bins, n_bins])
+
+    extent = [np.min(binx), np.max(binx), np.min(biny), np.max(biny)]
+    for coord in binx.index:
+        bin_image_z_score[coord[1], coord[0]] = z_score[coord]
+        bin_image_difference[coord[1], coord[0]] = difference[coord]
+        bin_image_ratio[coord[1], coord[0]] = log_ratio[coord]
+        bin_image_counts[coord[1], coord[0]] = n_cells[coord]
+
+    return (
+        extent,
+        bin_image_z_score,
+        bin_image_difference,
+        bin_image_ratio,
+        bin_image_counts
+    )
 
 def spatial_detection_scores(
     reference: pd.DataFrame,
     query: pd.DataFrame,
     plot_stuff=True,
     query_name: str = "query data",
-    comparison_column="transcript_counts",
-    category="supercluster_name",
-    n_bins=50,
-    in_place=True,
-    non_spatial=False,
+    comparison_column: str = "transcript_counts",
+    category: str = "supercluster_name",
+    n_bins: int = 50,
+    in_place: bool = True,
+    non_spatial: bool = False,
+    use_kde: bool = False,
 ):
     """
     Calculate and plot spatial detection scores for query data compared to reference data.
@@ -71,22 +160,22 @@ def spatial_detection_scores(
         n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50.
         in_place (bool, optional): Whether to modify the query data in place. Defaults to True.
         non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False.
+        use_kde (bool, optional): Whether to use kernel-density estimates or not. If not, bins into `n_bins` instead. Defaults to False.
 
     Returns:
         dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False).
     """
-    # code goes here
-
     if category not in reference.columns or category not in query.columns:
         raise ValueError("category " + category + " not in reference and query inputs")
 
     shared_category_values = list(
         set(reference[category].unique()) & set(query[category].unique())
     )
-    if (
+    if in_place and (
         len(shared_category_values) < query[category].unique().shape[0]
         or len(shared_category_values) < reference[category].unique().shape[0]
     ):
+        print("Query and reference datasets had different shapes. Objects will not be modified in place")
         in_place = False
 
     if in_place:
@@ -107,47 +196,41 @@ def spatial_detection_scores(
     s2["detection_relative_z_score"] = 0.0
     s2["detection_difference"] = 0.0
     s2["detection_ratio"] = 0.0
+    s2["log_10_detection_ratio"] = 0.0
 
     for c, gb in s2.groupby(category, observed=True):
         if c not in shared_category_values:
             continue
 
-        s2.loc[s2[category] == c, ["detection_relative_z_score"]] = (
-            (s2.loc[s2[category] == c, [comparison_column]] - means[c]) / stds[c]
+        indices = s2[category] == c
+        
+        s2.loc[indices, ["detection_relative_z_score"]] = (
+            (s2.loc[indices, [comparison_column]] - means[c]) / stds[c]
         ).values
-        s2.loc[s2[category] == c, ["detection_difference"]] = (
-            s2.loc[s2[category] == c, [comparison_column]] - means[c]
+        s2.loc[indices, ["detection_difference"]] = (
+            s2.loc[indices, [comparison_column]] - means[c]
         ).values
-        s2.loc[s2[category] == c, ["log_10_detection_ratio"]] = np.log10(
-            (s2.loc[s2[category] == c, [comparison_column]] / means[c]).values
-        )
-
-    s2["xy_bucket"] = list(
-        zip(
-            pd.cut(s2.x_centroid, n_bins, labels=list(range(n_bins))),
-            pd.cut(s2.y_centroid, n_bins, labels=list(range(n_bins))),
+        s2.loc[indices, ["detection_ratio"]] = (s2.loc[indices, [comparison_column]] / means[c]).values
+        s2.loc[indices, ["log_10_detection_ratio"]] = np.log10(
+            s2.loc[indices, ["detection_ratio"]].values
         )
-    )
-
-    binx = s2.groupby("xy_bucket").x_centroid.mean()
-    biny = s2.groupby("xy_bucket").y_centroid.mean()
 
-    z_score = s2.groupby("xy_bucket").detection_relative_z_score.mean()
-    difference = s2.groupby("xy_bucket").detection_difference.mean()
-    log_ratio = s2.groupby("xy_bucket").log_10_detection_ratio.mean()
-    n_cells = s2.groupby("xy_bucket").x_centroid.count()
-
-    bin_image_z_score = np.zeros([n_bins, n_bins])
-    bin_image_difference = np.zeros([n_bins, n_bins])
-    bin_image_ratio = np.zeros([n_bins, n_bins])
-    bin_image_counts = np.zeros([n_bins, n_bins])
-
-    extent = [np.min(binx), np.max(binx), np.min(biny), np.max(biny)]
-    for coord in binx.index:
-        bin_image_z_score[coord[1], coord[0]] = z_score[coord]
-        bin_image_difference[coord[1], coord[0]] = difference[coord]
-        bin_image_ratio[coord[1], coord[0]] = log_ratio[coord]
-        bin_image_counts[coord[1], coord[0]] = n_cells[coord]
+    if not use_kde:
+        (
+            extent,
+            bin_image_z_score,
+            bin_image_difference,
+            bin_image_ratio,
+            bin_image_counts
+        ) = spatial_detection_score_binned(s2, n_bins)
+    else:
+        (
+            extent,
+            bin_image_z_score,
+            bin_image_difference,
+            bin_image_ratio,
+            bin_image_counts
+        ) = spatial_detection_score_kde(s2)
 
     if plot_stuff:
         if non_spatial:
@@ -178,14 +261,13 @@ def spatial_detection_scores(
                 min_maxes[plot_name][0],
                 extent=extent,
                 cmap="coolwarm_r",
-                vmin=min_maxes[plot_name][1][0],
-                vmax=min_maxes[plot_name][1][1],
+                #vmin=min_maxes[plot_name][1][0],
+                #vmax=min_maxes[plot_name][1][1],
             )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)
 
     if in_place:
-
         return dict(
             z_score_image=bin_image_z_score,
             difference_image=bin_image_difference,

From 8dd9152b00b57b3fe3f7302ba1913d7870685132 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Tue, 10 Dec 2024 21:49:50 +0000
Subject: [PATCH 02/12] black, missed comment

---
 spatial_compare/utils.py | 74 ++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index f6828c8..810df4a 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -5,6 +5,7 @@
 import seaborn as sns
 import scipy as sp
 
+
 def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
     """
     Calculate the mean expression of observations grouped by a specified key.
@@ -47,55 +48,49 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
 
     return out
 
-def spatial_detection_score_kde(
-    query: pd.DataFrame,
-    grid_out: int = 100j
-):
+
+def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j):
     cell_x = query["x_centroid"]
     cell_y = query["y_centroid"]
-    
+
     xmin, xmax = cell_x.min(), cell_x.max()
     ymin, ymax = cell_y.min(), cell_y.max()
     extent = [xmin, xmax, ymin, ymax]
-    dmax = (xmax-xmin)**2 + (ymax-ymin)**2
-    
+    dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2
+
     X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out]
     positions = np.vstack([X.ravel(), Y.ravel()])
-    
-    cell_coords = np.vstack(
-        [
-            cell_x.values,
-            cell_y.values
-        ]
-    );
-
-    scores = [];
-    for column in ["detection_relative_z_score", "detection_difference", "log_10_detection_ratio"]:
+
+    cell_coords = np.vstack([cell_x.values, cell_y.values])
+
+    scores = []
+    for column in [
+        "detection_relative_z_score",
+        "detection_difference",
+        "log_10_detection_ratio",
+    ]:
         weights = np.abs(query[column].values)
         weights[np.isnan(weights)] = 0
-        #weights = weights + abs(weights.min())
+        # weights = weights + abs(weights.min())
 
         kde = sp.stats.gaussian_kde(
             cell_coords,
-            weights=weights
+            # weights=weights
         )(positions).T
-        #Z = np.reshape(kde, X.shape).T
+        # Z = np.reshape(kde, X.shape).T
 
         output = np.zeros(kde.shape)
         for i, z in enumerate(kde):
             coord = positions[:, i]
-            output[i] = z * np.sum(np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights)
+            output[i] = z * np.sum(
+                np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights
+            )
         Z = np.reshape(output, X.shape).T
-        
+
         scores.append(Z)
 
-    return (
-        extent,
-        scores[0],
-        scores[1],
-        scores[2],
-        np.ones(Z.shape)
-    )
+    return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape))
+
 
 def spatial_detection_score_binned(
     query: pd.DataFrame,
@@ -133,9 +128,10 @@ def spatial_detection_score_binned(
         bin_image_z_score,
         bin_image_difference,
         bin_image_ratio,
-        bin_image_counts
+        bin_image_counts,
     )
 
+
 def spatial_detection_scores(
     reference: pd.DataFrame,
     query: pd.DataFrame,
@@ -175,7 +171,9 @@ def spatial_detection_scores(
         len(shared_category_values) < query[category].unique().shape[0]
         or len(shared_category_values) < reference[category].unique().shape[0]
     ):
-        print("Query and reference datasets had different shapes. Objects will not be modified in place")
+        print(
+            "Query and reference datasets had different shapes. Objects will not be modified in place"
+        )
         in_place = False
 
     if in_place:
@@ -203,14 +201,16 @@ def spatial_detection_scores(
             continue
 
         indices = s2[category] == c
-        
+
         s2.loc[indices, ["detection_relative_z_score"]] = (
             (s2.loc[indices, [comparison_column]] - means[c]) / stds[c]
         ).values
         s2.loc[indices, ["detection_difference"]] = (
             s2.loc[indices, [comparison_column]] - means[c]
         ).values
-        s2.loc[indices, ["detection_ratio"]] = (s2.loc[indices, [comparison_column]] / means[c]).values
+        s2.loc[indices, ["detection_ratio"]] = (
+            s2.loc[indices, [comparison_column]] / means[c]
+        ).values
         s2.loc[indices, ["log_10_detection_ratio"]] = np.log10(
             s2.loc[indices, ["detection_ratio"]].values
         )
@@ -221,7 +221,7 @@ def spatial_detection_scores(
             bin_image_z_score,
             bin_image_difference,
             bin_image_ratio,
-            bin_image_counts
+            bin_image_counts,
         ) = spatial_detection_score_binned(s2, n_bins)
     else:
         (
@@ -229,7 +229,7 @@ def spatial_detection_scores(
             bin_image_z_score,
             bin_image_difference,
             bin_image_ratio,
-            bin_image_counts
+            bin_image_counts,
         ) = spatial_detection_score_kde(s2)
 
     if plot_stuff:
@@ -261,8 +261,8 @@ def spatial_detection_scores(
                 min_maxes[plot_name][0],
                 extent=extent,
                 cmap="coolwarm_r",
-                #vmin=min_maxes[plot_name][1][0],
-                #vmax=min_maxes[plot_name][1][1],
+                # vmin=min_maxes[plot_name][1][0],
+                # vmax=min_maxes[plot_name][1][1],
             )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)

From fb16fca326c7ae206403d440386cac6dee9634dd Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Tue, 10 Dec 2024 23:25:45 +0000
Subject: [PATCH 03/12] kernel-mean-estimator

---
 spatial_compare/utils.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 810df4a..5f0fa57 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -49,7 +49,7 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
     return out
 
 
-def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j):
+def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20):
     cell_x = query["x_centroid"]
     cell_y = query["y_centroid"]
 
@@ -71,21 +71,18 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j):
     ]:
         weights = np.abs(query[column].values)
         weights[np.isnan(weights)] = 0
-        # weights = weights + abs(weights.min())
+        weights = weights + abs(weights.min())
 
-        kde = sp.stats.gaussian_kde(
+        kde_weighted = sp.stats.gaussian_kde(
             cell_coords,
-            # weights=weights
+            weights=weights
         )(positions).T
-        # Z = np.reshape(kde, X.shape).T
-
-        output = np.zeros(kde.shape)
-        for i, z in enumerate(kde):
-            coord = positions[:, i]
-            output[i] = z * np.sum(
-                np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights
-            )
-        Z = np.reshape(output, X.shape).T
+        
+        kde_unweighted = sp.stats.gaussian_kde(
+            cell_coords,
+        )(positions).T
+        
+        Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T
 
         scores.append(Z)
 
@@ -230,7 +227,7 @@ def spatial_detection_scores(
             bin_image_difference,
             bin_image_ratio,
             bin_image_counts,
-        ) = spatial_detection_score_kde(s2)
+        ) = spatial_detection_score_kde(s2, n_bins)
 
     if plot_stuff:
         if non_spatial:

From 14c7e876c17bf5640c00ce7009ce6572e30ba415 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 02:43:04 +0000
Subject: [PATCH 04/12] log after computing

---
 spatial_compare/utils.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 5f0fa57..4e9819c 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -49,7 +49,7 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
     return out
 
 
-def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20):
+def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
     cell_x = query["x_centroid"]
     cell_y = query["y_centroid"]
 
@@ -58,7 +58,9 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20):
     extent = [xmin, xmax, ymin, ymax]
     dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2
 
-    X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out]
+    X, Y = np.mgrid[
+        xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out)
+    ]
     positions = np.vstack([X.ravel(), Y.ravel()])
 
     cell_coords = np.vstack([cell_x.values, cell_y.values])
@@ -67,26 +69,23 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20):
     for column in [
         "detection_relative_z_score",
         "detection_difference",
-        "log_10_detection_ratio",
+        "detection_ratio",
     ]:
         weights = np.abs(query[column].values)
         weights[np.isnan(weights)] = 0
         weights = weights + abs(weights.min())
 
-        kde_weighted = sp.stats.gaussian_kde(
-            cell_coords,
-            weights=weights
-        )(positions).T
-        
+        kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T
+
         kde_unweighted = sp.stats.gaussian_kde(
             cell_coords,
         )(positions).T
-        
+
         Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T
 
         scores.append(Z)
 
-    return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape))
+    return (extent, scores[0], scores[1], np.log10(scores[2]), np.ones(Z.shape))
 
 
 def spatial_detection_score_binned(
@@ -153,7 +152,7 @@ def spatial_detection_scores(
         n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50.
         in_place (bool, optional): Whether to modify the query data in place. Defaults to True.
         non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False.
-        use_kde (bool, optional): Whether to use kernel-density estimates or not. If not, bins into `n_bins` instead. Defaults to False.
+        use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Defaults to False.
 
     Returns:
         dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False).

From 742d10607177dd1932b541268af61633b90d740a Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 03:07:30 +0000
Subject: [PATCH 05/12] fix rogue abs

---
 spatial_compare/spatial_compare.py |  9 ---------
 spatial_compare/utils.py           | 15 +++++++++------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/spatial_compare/spatial_compare.py b/spatial_compare/spatial_compare.py
index dc11a8d..a06b442 100644
--- a/spatial_compare/spatial_compare.py
+++ b/spatial_compare/spatial_compare.py
@@ -493,9 +493,7 @@ def compare_expression(
                     [np.min(means_0), np.max(means_0)],
                     "--",
                 )
-        print(gene_ratio_dfs.keys())
         if len(gene_ratio_dfs.keys()) > 0:
-
             gene_ratio_df = pd.concat(gene_ratio_dfs, axis=1)
         else:
             gene_ratio_df = None
@@ -506,13 +504,11 @@ def compare_expression(
         }
 
     def plot_detection_ratio(self, gene_ratio_dataframe, figsize=[15, 15]):
-
         detection_ratio_plots(
             gene_ratio_dataframe, data_names=self.data_names, figsize=figsize
         )
 
     def spatial_compare(self, **kwargs):
-
         if "category" in kwargs.keys():
             self.set_category(kwargs["category"])
 
@@ -834,7 +830,6 @@ def filter_and_cluster_twice(
     if isinstance(input_ad.X, sparse.csr.csr_matrix):
         input_ad.X = input_ad.X.toarray()
 
-    print("converted to array  ")
     low_detection_genes = input_ad.var.iloc[
         np.nonzero(np.max(input_ad.X, axis=0) <= min_max_counts)
     ].gene
@@ -856,7 +851,6 @@ def filter_and_cluster_twice(
     ]
 
     # Normalizing to median total counts
-    print("normalizing total counts")
     sc.pp.normalize_total(to_cluster)
     # Logarithmize the data
     sc.pp.log1p(to_cluster)
@@ -879,11 +873,9 @@ def filter_and_cluster_twice(
 
     # per cluster, repeat PCA and clustering...
     # this duplicates the data! but it's necessary because the scanpy functions would create a copy of the subset anyway(?)
-    print("2nd round of clustering")
     all_subs = []
     for cl in to_cluster.obs.leiden_0.unique():
         subcopy = to_cluster[to_cluster.obs.leiden_0 == cl, :].copy()
-        print(subcopy.shape)
         sc.tl.pca(subcopy, n_comps=n_pcs[1])
         sc.pp.neighbors(subcopy)
         sc.tl.leiden(subcopy, n_iterations=n_iterations[1])
@@ -895,7 +887,6 @@ def filter_and_cluster_twice(
         ]
 
         all_subs.append(subcopy)
-    print("concatenating")
     iterative_clusters_ad = ad.concat(all_subs)
 
     return iterative_clusters_ad
diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 4e9819c..7809741 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -56,7 +56,6 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
     xmin, xmax = cell_x.min(), cell_x.max()
     ymin, ymax = cell_y.min(), cell_y.max()
     extent = [xmin, xmax, ymin, ymax]
-    dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2
 
     X, Y = np.mgrid[
         xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out)
@@ -69,10 +68,13 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
     for column in [
         "detection_relative_z_score",
         "detection_difference",
-        "detection_ratio",
+        "log_10_detection_ratio",
     ]:
-        weights = np.abs(query[column].values)
+        weights = query[column].values
         weights[np.isnan(weights)] = 0
+
+        wext = [weights.min(), weights.max()]
+        
         weights = weights + abs(weights.min())
 
         kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T
@@ -82,10 +84,11 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
         )(positions).T
 
         Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T
+        Z = (Z-Z.min())/(Z.max()-Z.min()) * (wext[1] - wext[0]) + wext[0]
 
         scores.append(Z)
 
-    return (extent, scores[0], scores[1], np.log10(scores[2]), np.ones(Z.shape))
+    return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape))
 
 
 def spatial_detection_score_binned(
@@ -257,8 +260,8 @@ def spatial_detection_scores(
                 min_maxes[plot_name][0],
                 extent=extent,
                 cmap="coolwarm_r",
-                # vmin=min_maxes[plot_name][1][0],
-                # vmax=min_maxes[plot_name][1][1],
+                #vmin=min_maxes[plot_name][1][0],
+                #vmax=min_maxes[plot_name][1][1],
             )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)

From 758894b88eb071a72d123319b827f49b2c27ae7b Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 03:07:42 +0000
Subject: [PATCH 06/12] black

---
 spatial_compare/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 7809741..4e64433 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -74,7 +74,7 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
         weights[np.isnan(weights)] = 0
 
         wext = [weights.min(), weights.max()]
-        
+
         weights = weights + abs(weights.min())
 
         kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T
@@ -84,7 +84,7 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
         )(positions).T
 
         Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T
-        Z = (Z-Z.min())/(Z.max()-Z.min()) * (wext[1] - wext[0]) + wext[0]
+        Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0]
 
         scores.append(Z)
 
@@ -260,8 +260,8 @@ def spatial_detection_scores(
                 min_maxes[plot_name][0],
                 extent=extent,
                 cmap="coolwarm_r",
-                #vmin=min_maxes[plot_name][1][0],
-                #vmax=min_maxes[plot_name][1][1],
+                # vmin=min_maxes[plot_name][1][0],
+                # vmax=min_maxes[plot_name][1][1],
             )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)

From f2be0cc1f68e3865ac9a32a0229137723e1bdda6 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 21:15:19 +0000
Subject: [PATCH 07/12] output mask, and estimator, and evaluate on cels

---
 spatial_compare/utils.py | 105 +++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 42 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 4e64433..021dc0f 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -52,17 +52,19 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
 def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
     cell_x = query["x_centroid"]
     cell_y = query["y_centroid"]
+    cell_coords = np.vstack([cell_x.values, cell_y.values])
 
     xmin, xmax = cell_x.min(), cell_x.max()
     ymin, ymax = cell_y.min(), cell_y.max()
     extent = [xmin, xmax, ymin, ymax]
 
-    X, Y = np.mgrid[
-        xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out)
-    ]
-    positions = np.vstack([X.ravel(), Y.ravel()])
-
-    cell_coords = np.vstack([cell_x.values, cell_y.values])
+    if grid_out == 0:
+        positions = cell_coords
+    else:
+        X, Y = np.mgrid[
+            xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out)
+        ]
+        positions = np.vstack([X.ravel(), Y.ravel()])
 
     scores = []
     for column in [
@@ -77,18 +79,22 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
 
         weights = weights + abs(weights.min())
 
-        kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T
-
+        kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)
         kde_unweighted = sp.stats.gaussian_kde(
             cell_coords,
-        )(positions).T
+        )
+
+        if column == "detection_relative_z_score":
+            estimator = lambda positions: kde_weighted(positions).T / kde_unweighted(positions).T
 
-        Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T
+        Z = kde_weighted(positions).T / kde_unweighted(positions).T
         Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0]
+        if grid_out != 0:
+            Z = np.reshape(Z, X.shape).T
 
         scores.append(Z)
 
-    return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape))
+    return (estimator, extent, scores[0], scores[1], scores[2])
 
 
 def spatial_detection_score_binned(
@@ -134,7 +140,7 @@ def spatial_detection_score_binned(
 def spatial_detection_scores(
     reference: pd.DataFrame,
     query: pd.DataFrame,
-    plot_stuff=True,
+    plot_stuff: bool = True,
     query_name: str = "query data",
     comparison_column: str = "transcript_counts",
     category: str = "supercluster_name",
@@ -142,6 +148,7 @@ def spatial_detection_scores(
     in_place: bool = True,
     non_spatial: bool = False,
     use_kde: bool = False,
+    mask: float = 0.0
 ):
     """
     Calculate and plot spatial detection scores for query data compared to reference data.
@@ -155,7 +162,7 @@ def spatial_detection_scores(
         n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50.
         in_place (bool, optional): Whether to modify the query data in place. Defaults to True.
         non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False.
-        use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Defaults to False.
+        use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Samples the KDE on a `n_bins` square grid for plotting, unless `n_bins == 0` (in which case it will be sampled at the cell coordinates). Defaults to False.
 
     Returns:
         dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False).
@@ -220,16 +227,24 @@ def spatial_detection_scores(
             bin_image_z_score,
             bin_image_difference,
             bin_image_ratio,
-            bin_image_counts,
+            bin_image_counts
         ) = spatial_detection_score_binned(s2, n_bins)
     else:
         (
+            estimator,
             extent,
             bin_image_z_score,
             bin_image_difference,
             bin_image_ratio,
-            bin_image_counts,
         ) = spatial_detection_score_kde(s2, n_bins)
+        # FIXME: not computing this
+        bin_image_counts = np.zeros(bin_image_ratio.shape)
+
+    if mask != 0.0:
+        bin_image_z_score = bin_image_z_score > np.quantile(bin_image_z_score, mask)
+        bin_image_difference = bin_image_difference > np.quantile(bin_image_difference, mask)
+        bin_image_ratio = bin_image_ratio > np.quantile(bin_image_ratio, mask)
+        bin_image_counts = bin_image_counts > np.quantile(bin_image_counts, mask)
 
     if plot_stuff:
         if non_spatial:
@@ -256,38 +271,44 @@ def spatial_detection_scores(
             )
         for ii, plot_name in enumerate(min_maxes.keys()):
             ax = axs[ii]
-            pcm = ax.imshow(
-                min_maxes[plot_name][0],
-                extent=extent,
-                cmap="coolwarm_r",
-                # vmin=min_maxes[plot_name][1][0],
-                # vmax=min_maxes[plot_name][1][1],
-            )
+
+            if n_bins != 0:
+                pcm = ax.imshow(
+                    min_maxes[plot_name][0],
+                    extent=extent,
+                    cmap="coolwarm_r",
+                    # vmin=min_maxes[plot_name][1][0],
+                    # vmax=min_maxes[plot_name][1][1],
+                )
+            else:
+                pcm = ax.scatter(
+                    s2.x_centroid.values,
+                    -s2.y_centroid.values,
+                    c=min_maxes[plot_name][0],
+                    cmap="coolwarm_r",
+                )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)
 
-    if in_place:
-        return dict(
-            z_score_image=bin_image_z_score,
-            difference_image=bin_image_difference,
-            ratio_image=bin_image_ratio,
-            extent=extent,
-            count_image=bin_image_counts,
-            query=True,
-            reference=True,
-        )
+    ret = dict(
+        z_score_image=bin_image_z_score,
+        difference_image=bin_image_difference,
+        ratio_image=bin_image_ratio,
+        extent=extent,
+        count_image=bin_image_counts,
+        query=True,
+        reference=True,
+    )
 
+    if use_kde:
+        ret["z_score_estimator"] = estimator
+    
+    if in_place:
+        return ret
     else:
-        return dict(
-            z_score_image=bin_image_z_score,
-            difference_image=bin_image_difference,
-            ratio_image=bin_image_ratio,
-            extent=extent,
-            count_image=bin_image_counts,
-            query=s2,
-            reference=s1,
-        )
-
+        ret["query"] = s2
+        ret["reference"] = s1
+        return ret
 
 def summarize_and_plot(
     spatial_density_results,

From 2d33963d220a085ce4ea950570e34d10398213c2 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 21:16:16 +0000
Subject: [PATCH 08/12] black

---
 spatial_compare/utils.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 021dc0f..19ac490 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -85,7 +85,10 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100):
         )
 
         if column == "detection_relative_z_score":
-            estimator = lambda positions: kde_weighted(positions).T / kde_unweighted(positions).T
+            estimator = (
+                lambda positions: kde_weighted(positions).T
+                / kde_unweighted(positions).T
+            )
 
         Z = kde_weighted(positions).T / kde_unweighted(positions).T
         Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0]
@@ -148,7 +151,7 @@ def spatial_detection_scores(
     in_place: bool = True,
     non_spatial: bool = False,
     use_kde: bool = False,
-    mask: float = 0.0
+    mask: float = 0.0,
 ):
     """
     Calculate and plot spatial detection scores for query data compared to reference data.
@@ -227,7 +230,7 @@ def spatial_detection_scores(
             bin_image_z_score,
             bin_image_difference,
             bin_image_ratio,
-            bin_image_counts
+            bin_image_counts,
         ) = spatial_detection_score_binned(s2, n_bins)
     else:
         (
@@ -242,7 +245,9 @@ def spatial_detection_scores(
 
     if mask != 0.0:
         bin_image_z_score = bin_image_z_score > np.quantile(bin_image_z_score, mask)
-        bin_image_difference = bin_image_difference > np.quantile(bin_image_difference, mask)
+        bin_image_difference = bin_image_difference > np.quantile(
+            bin_image_difference, mask
+        )
         bin_image_ratio = bin_image_ratio > np.quantile(bin_image_ratio, mask)
         bin_image_counts = bin_image_counts > np.quantile(bin_image_counts, mask)
 
@@ -302,7 +307,7 @@ def spatial_detection_scores(
 
     if use_kde:
         ret["z_score_estimator"] = estimator
-    
+
     if in_place:
         return ret
     else:
@@ -310,6 +315,7 @@ def spatial_detection_scores(
         ret["reference"] = s1
         return ret
 
+
 def summarize_and_plot(
     spatial_density_results,
     min_cells_per_bin=50,

From 83ee279a695296976603c61e42abcd7f389a164b Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 21:25:21 +0000
Subject: [PATCH 09/12] black/white mask

---
 spatial_compare/utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 19ac490..1752469 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -277,11 +277,16 @@ def spatial_detection_scores(
         for ii, plot_name in enumerate(min_maxes.keys()):
             ax = axs[ii]
 
+            if mask != 0.0:
+                cmap = "Greys"
+            else:
+                cmap = "woolwarm_r"
+
             if n_bins != 0:
                 pcm = ax.imshow(
                     min_maxes[plot_name][0],
                     extent=extent,
-                    cmap="coolwarm_r",
+                    cmap=cmap,
                     # vmin=min_maxes[plot_name][1][0],
                     # vmax=min_maxes[plot_name][1][1],
                 )
@@ -290,7 +295,7 @@ def spatial_detection_scores(
                     s2.x_centroid.values,
                     -s2.y_centroid.values,
                     c=min_maxes[plot_name][0],
-                    cmap="coolwarm_r",
+                    cmap=cmap,
                 )
             fig.colorbar(pcm, ax=ax, shrink=0.7)
             ax.set_title(query_name + "\n" + plot_name)

From ad4396256f2dbb2781d5b16daff7f44cde16272c Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 21:28:07 +0000
Subject: [PATCH 10/12] fix

---
 spatial_compare/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 1752469..d8eb585 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -280,7 +280,7 @@ def spatial_detection_scores(
             if mask != 0.0:
                 cmap = "Greys"
             else:
-                cmap = "woolwarm_r"
+                cmap = "coolwarm_r"
 
             if n_bins != 0:
                 pcm = ax.imshow(

From 6d2daf6ea3af38c6d0cf2e5aa10bc336c31678cc Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 11 Dec 2024 21:31:37 +0000
Subject: [PATCH 11/12] document

---
 spatial_compare/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index d8eb585..7beb6d7 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -166,6 +166,7 @@ def spatial_detection_scores(
         in_place (bool, optional): Whether to modify the query data in place. Defaults to True.
         non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False.
         use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Samples the KDE on a `n_bins` square grid for plotting, unless `n_bins == 0` (in which case it will be sampled at the cell coordinates). Defaults to False.
+        mask (float, optional): A quantile at which to create binary masks from. Defaults to 0.0 (do not binarize).
 
     Returns:
         dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False).

From ac77c3046b396ef2feb851824c176d98595e47d8 Mon Sep 17 00:00:00 2001
From: Brian Long <berl@users.noreply.github.com>
Date: Thu, 12 Dec 2024 15:34:56 -0800
Subject: [PATCH 12/12] add back fixed z-score range for plotting

---
 spatial_compare/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py
index 7beb6d7..1d7aefa 100644
--- a/spatial_compare/utils.py
+++ b/spatial_compare/utils.py
@@ -258,7 +258,7 @@ def spatial_detection_scores(
         else:
             title_string = "Spatial Detection Scores"
         min_maxes = {
-            "detection z-score": [bin_image_z_score, [-1, 1]],
+            "detection z-score": [bin_image_z_score, [-2, 2]],
             "total counts difference": [bin_image_difference, [-100, 100]],
             "log10(detection ratio)": [bin_image_ratio, [-1, 1]],
         }
@@ -288,8 +288,8 @@ def spatial_detection_scores(
                     min_maxes[plot_name][0],
                     extent=extent,
                     cmap=cmap,
-                    # vmin=min_maxes[plot_name][1][0],
-                    # vmax=min_maxes[plot_name][1][1],
+                    vmin=min_maxes[plot_name][1][0],
+                    vmax=min_maxes[plot_name][1][1],
                 )
             else:
                 pcm = ax.scatter(