From e6110b9c9e53d197d912c707042058e3c089d756 Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Tue, 10 Dec 2024 21:38:12 +0000 Subject: [PATCH 01/12] thinking about KDE implementation --- spatial_compare/spatial_compare.py | 1 - spatial_compare/utils.py | 170 +++++++++++++++++++++-------- 2 files changed, 126 insertions(+), 45 deletions(-) diff --git a/spatial_compare/spatial_compare.py b/spatial_compare/spatial_compare.py index 30e2f31..dc11a8d 100644 --- a/spatial_compare/spatial_compare.py +++ b/spatial_compare/spatial_compare.py @@ -14,7 +14,6 @@ from spatial_compare.utils import grouped_obs_mean - DEFAULT_DATA_NAMES = ["Data 0", "Data 1"] diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 1e4ca02..f6828c8 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd import seaborn as sns - +import scipy as sp def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): """ @@ -47,17 +47,106 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): return out +def spatial_detection_score_kde( + query: pd.DataFrame, + grid_out: int = 100j +): + cell_x = query["x_centroid"] + cell_y = query["y_centroid"] + + xmin, xmax = cell_x.min(), cell_x.max() + ymin, ymax = cell_y.min(), cell_y.max() + extent = [xmin, xmax, ymin, ymax] + dmax = (xmax-xmin)**2 + (ymax-ymin)**2 + + X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out] + positions = np.vstack([X.ravel(), Y.ravel()]) + + cell_coords = np.vstack( + [ + cell_x.values, + cell_y.values + ] + ); + + scores = []; + for column in ["detection_relative_z_score", "detection_difference", "log_10_detection_ratio"]: + weights = np.abs(query[column].values) + weights[np.isnan(weights)] = 0 + #weights = weights + abs(weights.min()) + + kde = sp.stats.gaussian_kde( + cell_coords, + weights=weights + )(positions).T + #Z = np.reshape(kde, X.shape).T + + output = np.zeros(kde.shape) + for i, z in enumerate(kde): + coord = positions[:, i] + output[i] = z * np.sum(np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights) + Z = np.reshape(output, X.shape).T + + scores.append(Z) + + return ( + extent, + scores[0], + scores[1], + scores[2], + np.ones(Z.shape) + ) + +def spatial_detection_score_binned( + query: pd.DataFrame, + n_bins: int = 50, +): + query["xy_bucket"] = list( + zip( + pd.cut(query.x_centroid, n_bins, labels=list(range(n_bins))), + pd.cut(query.y_centroid, n_bins, labels=list(range(n_bins))), + ) + ) + + binx = query.groupby("xy_bucket").x_centroid.mean() + biny = query.groupby("xy_bucket").y_centroid.mean() + + z_score = query.groupby("xy_bucket").detection_relative_z_score.mean() + difference = query.groupby("xy_bucket").detection_difference.mean() + log_ratio = query.groupby("xy_bucket").log_10_detection_ratio.mean() + n_cells = query.groupby("xy_bucket").x_centroid.count() + + bin_image_z_score = np.zeros([n_bins, n_bins]) + bin_image_difference = np.zeros([n_bins, n_bins]) + bin_image_ratio = np.zeros([n_bins, n_bins]) + bin_image_counts = np.zeros([n_bins, n_bins]) + + extent = [np.min(binx), np.max(binx), np.min(biny), np.max(biny)] + for coord in binx.index: + bin_image_z_score[coord[1], coord[0]] = z_score[coord] + bin_image_difference[coord[1], coord[0]] = difference[coord] + bin_image_ratio[coord[1], coord[0]] = log_ratio[coord] + bin_image_counts[coord[1], coord[0]] = n_cells[coord] + + return ( + extent, + bin_image_z_score, + bin_image_difference, + bin_image_ratio, + bin_image_counts + ) def spatial_detection_scores( reference: pd.DataFrame, query: pd.DataFrame, plot_stuff=True, query_name: str = "query data", - comparison_column="transcript_counts", - category="supercluster_name", - n_bins=50, - in_place=True, - non_spatial=False, + comparison_column: str = "transcript_counts", + category: str = "supercluster_name", + n_bins: int = 50, + in_place: bool = True, + non_spatial: bool = False, + use_kde: bool = False, ): """ Calculate and plot spatial detection scores for query data compared to reference data. @@ -71,22 +160,22 @@ def spatial_detection_scores( n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50. in_place (bool, optional): Whether to modify the query data in place. Defaults to True. non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False. + use_kde (bool, optional): Whether to use kernel-density estimates or not. If not, bins into `n_bins` instead. Defaults to False. Returns: dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False). """ - # code goes here - if category not in reference.columns or category not in query.columns: raise ValueError("category " + category + " not in reference and query inputs") shared_category_values = list( set(reference[category].unique()) & set(query[category].unique()) ) - if ( + if in_place and ( len(shared_category_values) < query[category].unique().shape[0] or len(shared_category_values) < reference[category].unique().shape[0] ): + print("Query and reference datasets had different shapes. Objects will not be modified in place") in_place = False if in_place: @@ -107,47 +196,41 @@ def spatial_detection_scores( s2["detection_relative_z_score"] = 0.0 s2["detection_difference"] = 0.0 s2["detection_ratio"] = 0.0 + s2["log_10_detection_ratio"] = 0.0 for c, gb in s2.groupby(category, observed=True): if c not in shared_category_values: continue - s2.loc[s2[category] == c, ["detection_relative_z_score"]] = ( - (s2.loc[s2[category] == c, [comparison_column]] - means[c]) / stds[c] + indices = s2[category] == c + + s2.loc[indices, ["detection_relative_z_score"]] = ( + (s2.loc[indices, [comparison_column]] - means[c]) / stds[c] ).values - s2.loc[s2[category] == c, ["detection_difference"]] = ( - s2.loc[s2[category] == c, [comparison_column]] - means[c] + s2.loc[indices, ["detection_difference"]] = ( + s2.loc[indices, [comparison_column]] - means[c] ).values - s2.loc[s2[category] == c, ["log_10_detection_ratio"]] = np.log10( - (s2.loc[s2[category] == c, [comparison_column]] / means[c]).values - ) - - s2["xy_bucket"] = list( - zip( - pd.cut(s2.x_centroid, n_bins, labels=list(range(n_bins))), - pd.cut(s2.y_centroid, n_bins, labels=list(range(n_bins))), + s2.loc[indices, ["detection_ratio"]] = (s2.loc[indices, [comparison_column]] / means[c]).values + s2.loc[indices, ["log_10_detection_ratio"]] = np.log10( + s2.loc[indices, ["detection_ratio"]].values ) - ) - - binx = s2.groupby("xy_bucket").x_centroid.mean() - biny = s2.groupby("xy_bucket").y_centroid.mean() - z_score = s2.groupby("xy_bucket").detection_relative_z_score.mean() - difference = s2.groupby("xy_bucket").detection_difference.mean() - log_ratio = s2.groupby("xy_bucket").log_10_detection_ratio.mean() - n_cells = s2.groupby("xy_bucket").x_centroid.count() - - bin_image_z_score = np.zeros([n_bins, n_bins]) - bin_image_difference = np.zeros([n_bins, n_bins]) - bin_image_ratio = np.zeros([n_bins, n_bins]) - bin_image_counts = np.zeros([n_bins, n_bins]) - - extent = [np.min(binx), np.max(binx), np.min(biny), np.max(biny)] - for coord in binx.index: - bin_image_z_score[coord[1], coord[0]] = z_score[coord] - bin_image_difference[coord[1], coord[0]] = difference[coord] - bin_image_ratio[coord[1], coord[0]] = log_ratio[coord] - bin_image_counts[coord[1], coord[0]] = n_cells[coord] + if not use_kde: + ( + extent, + bin_image_z_score, + bin_image_difference, + bin_image_ratio, + bin_image_counts + ) = spatial_detection_score_binned(s2, n_bins) + else: + ( + extent, + bin_image_z_score, + bin_image_difference, + bin_image_ratio, + bin_image_counts + ) = spatial_detection_score_kde(s2) if plot_stuff: if non_spatial: @@ -178,14 +261,13 @@ def spatial_detection_scores( min_maxes[plot_name][0], extent=extent, cmap="coolwarm_r", - vmin=min_maxes[plot_name][1][0], - vmax=min_maxes[plot_name][1][1], + #vmin=min_maxes[plot_name][1][0], + #vmax=min_maxes[plot_name][1][1], ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) if in_place: - return dict( z_score_image=bin_image_z_score, difference_image=bin_image_difference, From 8dd9152b00b57b3fe3f7302ba1913d7870685132 Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Tue, 10 Dec 2024 21:49:50 +0000 Subject: [PATCH 02/12] black, missed comment --- spatial_compare/utils.py | 74 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index f6828c8..810df4a 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -5,6 +5,7 @@ import seaborn as sns import scipy as sp + def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): """ Calculate the mean expression of observations grouped by a specified key. @@ -47,55 +48,49 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): return out -def spatial_detection_score_kde( - query: pd.DataFrame, - grid_out: int = 100j -): + +def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j): cell_x = query["x_centroid"] cell_y = query["y_centroid"] - + xmin, xmax = cell_x.min(), cell_x.max() ymin, ymax = cell_y.min(), cell_y.max() extent = [xmin, xmax, ymin, ymax] - dmax = (xmax-xmin)**2 + (ymax-ymin)**2 - + dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2 + X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out] positions = np.vstack([X.ravel(), Y.ravel()]) - - cell_coords = np.vstack( - [ - cell_x.values, - cell_y.values - ] - ); - - scores = []; - for column in ["detection_relative_z_score", "detection_difference", "log_10_detection_ratio"]: + + cell_coords = np.vstack([cell_x.values, cell_y.values]) + + scores = [] + for column in [ + "detection_relative_z_score", + "detection_difference", + "log_10_detection_ratio", + ]: weights = np.abs(query[column].values) weights[np.isnan(weights)] = 0 - #weights = weights + abs(weights.min()) + # weights = weights + abs(weights.min()) kde = sp.stats.gaussian_kde( cell_coords, - weights=weights + # weights=weights )(positions).T - #Z = np.reshape(kde, X.shape).T + # Z = np.reshape(kde, X.shape).T output = np.zeros(kde.shape) for i, z in enumerate(kde): coord = positions[:, i] - output[i] = z * np.sum(np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights) + output[i] = z * np.sum( + np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights + ) Z = np.reshape(output, X.shape).T - + scores.append(Z) - return ( - extent, - scores[0], - scores[1], - scores[2], - np.ones(Z.shape) - ) + return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape)) + def spatial_detection_score_binned( query: pd.DataFrame, @@ -133,9 +128,10 @@ def spatial_detection_score_binned( bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts + bin_image_counts, ) + def spatial_detection_scores( reference: pd.DataFrame, query: pd.DataFrame, @@ -175,7 +171,9 @@ def spatial_detection_scores( len(shared_category_values) < query[category].unique().shape[0] or len(shared_category_values) < reference[category].unique().shape[0] ): - print("Query and reference datasets had different shapes. Objects will not be modified in place") + print( + "Query and reference datasets had different shapes. Objects will not be modified in place" + ) in_place = False if in_place: @@ -203,14 +201,16 @@ def spatial_detection_scores( continue indices = s2[category] == c - + s2.loc[indices, ["detection_relative_z_score"]] = ( (s2.loc[indices, [comparison_column]] - means[c]) / stds[c] ).values s2.loc[indices, ["detection_difference"]] = ( s2.loc[indices, [comparison_column]] - means[c] ).values - s2.loc[indices, ["detection_ratio"]] = (s2.loc[indices, [comparison_column]] / means[c]).values + s2.loc[indices, ["detection_ratio"]] = ( + s2.loc[indices, [comparison_column]] / means[c] + ).values s2.loc[indices, ["log_10_detection_ratio"]] = np.log10( s2.loc[indices, ["detection_ratio"]].values ) @@ -221,7 +221,7 @@ def spatial_detection_scores( bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts + bin_image_counts, ) = spatial_detection_score_binned(s2, n_bins) else: ( @@ -229,7 +229,7 @@ def spatial_detection_scores( bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts + bin_image_counts, ) = spatial_detection_score_kde(s2) if plot_stuff: @@ -261,8 +261,8 @@ def spatial_detection_scores( min_maxes[plot_name][0], extent=extent, cmap="coolwarm_r", - #vmin=min_maxes[plot_name][1][0], - #vmax=min_maxes[plot_name][1][1], + # vmin=min_maxes[plot_name][1][0], + # vmax=min_maxes[plot_name][1][1], ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) From fb16fca326c7ae206403d440386cac6dee9634dd Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Tue, 10 Dec 2024 23:25:45 +0000 Subject: [PATCH 03/12] kernel-mean-estimator --- spatial_compare/utils.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 810df4a..5f0fa57 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -49,7 +49,7 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): return out -def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j): +def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20): cell_x = query["x_centroid"] cell_y = query["y_centroid"] @@ -71,21 +71,18 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100j): ]: weights = np.abs(query[column].values) weights[np.isnan(weights)] = 0 - # weights = weights + abs(weights.min()) + weights = weights + abs(weights.min()) - kde = sp.stats.gaussian_kde( + kde_weighted = sp.stats.gaussian_kde( cell_coords, - # weights=weights + weights=weights )(positions).T - # Z = np.reshape(kde, X.shape).T - - output = np.zeros(kde.shape) - for i, z in enumerate(kde): - coord = positions[:, i] - output[i] = z * np.sum( - np.sqrt(dmax - np.square(cell_coords.T - coord).sum(axis=1)) * weights - ) - Z = np.reshape(output, X.shape).T + + kde_unweighted = sp.stats.gaussian_kde( + cell_coords, + )(positions).T + + Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T scores.append(Z) @@ -230,7 +227,7 @@ def spatial_detection_scores( bin_image_difference, bin_image_ratio, bin_image_counts, - ) = spatial_detection_score_kde(s2) + ) = spatial_detection_score_kde(s2, n_bins) if plot_stuff: if non_spatial: From 14c7e876c17bf5640c00ce7009ce6572e30ba415 Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 02:43:04 +0000 Subject: [PATCH 04/12] log after computing --- spatial_compare/utils.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 5f0fa57..4e9819c 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -49,7 +49,7 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): return out -def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20): +def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): cell_x = query["x_centroid"] cell_y = query["y_centroid"] @@ -58,7 +58,9 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20): extent = [xmin, xmax, ymin, ymax] dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2 - X, Y = np.mgrid[xmin:xmax:grid_out, ymin:ymax:grid_out] + X, Y = np.mgrid[ + xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out) + ] positions = np.vstack([X.ravel(), Y.ravel()]) cell_coords = np.vstack([cell_x.values, cell_y.values]) @@ -67,26 +69,23 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 20): for column in [ "detection_relative_z_score", "detection_difference", - "log_10_detection_ratio", + "detection_ratio", ]: weights = np.abs(query[column].values) weights[np.isnan(weights)] = 0 weights = weights + abs(weights.min()) - kde_weighted = sp.stats.gaussian_kde( - cell_coords, - weights=weights - )(positions).T - + kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T + kde_unweighted = sp.stats.gaussian_kde( cell_coords, )(positions).T - + Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T scores.append(Z) - return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape)) + return (extent, scores[0], scores[1], np.log10(scores[2]), np.ones(Z.shape)) def spatial_detection_score_binned( @@ -153,7 +152,7 @@ def spatial_detection_scores( n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50. in_place (bool, optional): Whether to modify the query data in place. Defaults to True. non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False. - use_kde (bool, optional): Whether to use kernel-density estimates or not. If not, bins into `n_bins` instead. Defaults to False. + use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Defaults to False. Returns: dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False). From 742d10607177dd1932b541268af61633b90d740a Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 03:07:30 +0000 Subject: [PATCH 05/12] fix rogue abs --- spatial_compare/spatial_compare.py | 9 --------- spatial_compare/utils.py | 15 +++++++++------ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/spatial_compare/spatial_compare.py b/spatial_compare/spatial_compare.py index dc11a8d..a06b442 100644 --- a/spatial_compare/spatial_compare.py +++ b/spatial_compare/spatial_compare.py @@ -493,9 +493,7 @@ def compare_expression( [np.min(means_0), np.max(means_0)], "--", ) - print(gene_ratio_dfs.keys()) if len(gene_ratio_dfs.keys()) > 0: - gene_ratio_df = pd.concat(gene_ratio_dfs, axis=1) else: gene_ratio_df = None @@ -506,13 +504,11 @@ def compare_expression( } def plot_detection_ratio(self, gene_ratio_dataframe, figsize=[15, 15]): - detection_ratio_plots( gene_ratio_dataframe, data_names=self.data_names, figsize=figsize ) def spatial_compare(self, **kwargs): - if "category" in kwargs.keys(): self.set_category(kwargs["category"]) @@ -834,7 +830,6 @@ def filter_and_cluster_twice( if isinstance(input_ad.X, sparse.csr.csr_matrix): input_ad.X = input_ad.X.toarray() - print("converted to array ") low_detection_genes = input_ad.var.iloc[ np.nonzero(np.max(input_ad.X, axis=0) <= min_max_counts) ].gene @@ -856,7 +851,6 @@ def filter_and_cluster_twice( ] # Normalizing to median total counts - print("normalizing total counts") sc.pp.normalize_total(to_cluster) # Logarithmize the data sc.pp.log1p(to_cluster) @@ -879,11 +873,9 @@ def filter_and_cluster_twice( # per cluster, repeat PCA and clustering... # this duplicates the data! but it's necessary because the scanpy functions would create a copy of the subset anyway(?) - print("2nd round of clustering") all_subs = [] for cl in to_cluster.obs.leiden_0.unique(): subcopy = to_cluster[to_cluster.obs.leiden_0 == cl, :].copy() - print(subcopy.shape) sc.tl.pca(subcopy, n_comps=n_pcs[1]) sc.pp.neighbors(subcopy) sc.tl.leiden(subcopy, n_iterations=n_iterations[1]) @@ -895,7 +887,6 @@ def filter_and_cluster_twice( ] all_subs.append(subcopy) - print("concatenating") iterative_clusters_ad = ad.concat(all_subs) return iterative_clusters_ad diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 4e9819c..7809741 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -56,7 +56,6 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): xmin, xmax = cell_x.min(), cell_x.max() ymin, ymax = cell_y.min(), cell_y.max() extent = [xmin, xmax, ymin, ymax] - dmax = (xmax - xmin) ** 2 + (ymax - ymin) ** 2 X, Y = np.mgrid[ xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out) @@ -69,10 +68,13 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): for column in [ "detection_relative_z_score", "detection_difference", - "detection_ratio", + "log_10_detection_ratio", ]: - weights = np.abs(query[column].values) + weights = query[column].values weights[np.isnan(weights)] = 0 + + wext = [weights.min(), weights.max()] + weights = weights + abs(weights.min()) kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T @@ -82,10 +84,11 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): )(positions).T Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T + Z = (Z-Z.min())/(Z.max()-Z.min()) * (wext[1] - wext[0]) + wext[0] scores.append(Z) - return (extent, scores[0], scores[1], np.log10(scores[2]), np.ones(Z.shape)) + return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape)) def spatial_detection_score_binned( @@ -257,8 +260,8 @@ def spatial_detection_scores( min_maxes[plot_name][0], extent=extent, cmap="coolwarm_r", - # vmin=min_maxes[plot_name][1][0], - # vmax=min_maxes[plot_name][1][1], + #vmin=min_maxes[plot_name][1][0], + #vmax=min_maxes[plot_name][1][1], ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) From 758894b88eb071a72d123319b827f49b2c27ae7b Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 03:07:42 +0000 Subject: [PATCH 06/12] black --- spatial_compare/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 7809741..4e64433 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -74,7 +74,7 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): weights[np.isnan(weights)] = 0 wext = [weights.min(), weights.max()] - + weights = weights + abs(weights.min()) kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T @@ -84,7 +84,7 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): )(positions).T Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T - Z = (Z-Z.min())/(Z.max()-Z.min()) * (wext[1] - wext[0]) + wext[0] + Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0] scores.append(Z) @@ -260,8 +260,8 @@ def spatial_detection_scores( min_maxes[plot_name][0], extent=extent, cmap="coolwarm_r", - #vmin=min_maxes[plot_name][1][0], - #vmax=min_maxes[plot_name][1][1], + # vmin=min_maxes[plot_name][1][0], + # vmax=min_maxes[plot_name][1][1], ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) From f2be0cc1f68e3865ac9a32a0229137723e1bdda6 Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 21:15:19 +0000 Subject: [PATCH 07/12] output mask, and estimator, and evaluate on cels --- spatial_compare/utils.py | 105 +++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 4e64433..021dc0f 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -52,17 +52,19 @@ def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None): def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): cell_x = query["x_centroid"] cell_y = query["y_centroid"] + cell_coords = np.vstack([cell_x.values, cell_y.values]) xmin, xmax = cell_x.min(), cell_x.max() ymin, ymax = cell_y.min(), cell_y.max() extent = [xmin, xmax, ymin, ymax] - X, Y = np.mgrid[ - xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out) - ] - positions = np.vstack([X.ravel(), Y.ravel()]) - - cell_coords = np.vstack([cell_x.values, cell_y.values]) + if grid_out == 0: + positions = cell_coords + else: + X, Y = np.mgrid[ + xmin : xmax : complex(0, grid_out), ymin : ymax : complex(0, grid_out) + ] + positions = np.vstack([X.ravel(), Y.ravel()]) scores = [] for column in [ @@ -77,18 +79,22 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): weights = weights + abs(weights.min()) - kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights)(positions).T - + kde_weighted = sp.stats.gaussian_kde(cell_coords, weights=weights) kde_unweighted = sp.stats.gaussian_kde( cell_coords, - )(positions).T + ) + + if column == "detection_relative_z_score": + estimator = lambda positions: kde_weighted(positions).T / kde_unweighted(positions).T - Z = np.reshape(kde_weighted / kde_unweighted, X.shape).T + Z = kde_weighted(positions).T / kde_unweighted(positions).T Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0] + if grid_out != 0: + Z = np.reshape(Z, X.shape).T scores.append(Z) - return (extent, scores[0], scores[1], scores[2], np.ones(Z.shape)) + return (estimator, extent, scores[0], scores[1], scores[2]) def spatial_detection_score_binned( @@ -134,7 +140,7 @@ def spatial_detection_score_binned( def spatial_detection_scores( reference: pd.DataFrame, query: pd.DataFrame, - plot_stuff=True, + plot_stuff: bool = True, query_name: str = "query data", comparison_column: str = "transcript_counts", category: str = "supercluster_name", @@ -142,6 +148,7 @@ def spatial_detection_scores( in_place: bool = True, non_spatial: bool = False, use_kde: bool = False, + mask: float = 0.0 ): """ Calculate and plot spatial detection scores for query data compared to reference data. @@ -155,7 +162,7 @@ def spatial_detection_scores( n_bins (int, optional): The number of bins for spatial grouping. Defaults to 50. in_place (bool, optional): Whether to modify the query data in place. Defaults to True. non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False. - use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Defaults to False. + use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Samples the KDE on a `n_bins` square grid for plotting, unless `n_bins == 0` (in which case it will be sampled at the cell coordinates). Defaults to False. Returns: dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False). @@ -220,16 +227,24 @@ def spatial_detection_scores( bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts, + bin_image_counts ) = spatial_detection_score_binned(s2, n_bins) else: ( + estimator, extent, bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts, ) = spatial_detection_score_kde(s2, n_bins) + # FIXME: not computing this + bin_image_counts = np.zeros(bin_image_ratio.shape) + + if mask != 0.0: + bin_image_z_score = bin_image_z_score > np.quantile(bin_image_z_score, mask) + bin_image_difference = bin_image_difference > np.quantile(bin_image_difference, mask) + bin_image_ratio = bin_image_ratio > np.quantile(bin_image_ratio, mask) + bin_image_counts = bin_image_counts > np.quantile(bin_image_counts, mask) if plot_stuff: if non_spatial: @@ -256,38 +271,44 @@ def spatial_detection_scores( ) for ii, plot_name in enumerate(min_maxes.keys()): ax = axs[ii] - pcm = ax.imshow( - min_maxes[plot_name][0], - extent=extent, - cmap="coolwarm_r", - # vmin=min_maxes[plot_name][1][0], - # vmax=min_maxes[plot_name][1][1], - ) + + if n_bins != 0: + pcm = ax.imshow( + min_maxes[plot_name][0], + extent=extent, + cmap="coolwarm_r", + # vmin=min_maxes[plot_name][1][0], + # vmax=min_maxes[plot_name][1][1], + ) + else: + pcm = ax.scatter( + s2.x_centroid.values, + -s2.y_centroid.values, + c=min_maxes[plot_name][0], + cmap="coolwarm_r", + ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) - if in_place: - return dict( - z_score_image=bin_image_z_score, - difference_image=bin_image_difference, - ratio_image=bin_image_ratio, - extent=extent, - count_image=bin_image_counts, - query=True, - reference=True, - ) + ret = dict( + z_score_image=bin_image_z_score, + difference_image=bin_image_difference, + ratio_image=bin_image_ratio, + extent=extent, + count_image=bin_image_counts, + query=True, + reference=True, + ) + if use_kde: + ret["z_score_estimator"] = estimator + + if in_place: + return ret else: - return dict( - z_score_image=bin_image_z_score, - difference_image=bin_image_difference, - ratio_image=bin_image_ratio, - extent=extent, - count_image=bin_image_counts, - query=s2, - reference=s1, - ) - + ret["query"] = s2 + ret["reference"] = s1 + return ret def summarize_and_plot( spatial_density_results, From 2d33963d220a085ce4ea950570e34d10398213c2 Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 21:16:16 +0000 Subject: [PATCH 08/12] black --- spatial_compare/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 021dc0f..19ac490 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -85,7 +85,10 @@ def spatial_detection_score_kde(query: pd.DataFrame, grid_out: int = 100): ) if column == "detection_relative_z_score": - estimator = lambda positions: kde_weighted(positions).T / kde_unweighted(positions).T + estimator = ( + lambda positions: kde_weighted(positions).T + / kde_unweighted(positions).T + ) Z = kde_weighted(positions).T / kde_unweighted(positions).T Z = (Z - Z.min()) / (Z.max() - Z.min()) * (wext[1] - wext[0]) + wext[0] @@ -148,7 +151,7 @@ def spatial_detection_scores( in_place: bool = True, non_spatial: bool = False, use_kde: bool = False, - mask: float = 0.0 + mask: float = 0.0, ): """ Calculate and plot spatial detection scores for query data compared to reference data. @@ -227,7 +230,7 @@ def spatial_detection_scores( bin_image_z_score, bin_image_difference, bin_image_ratio, - bin_image_counts + bin_image_counts, ) = spatial_detection_score_binned(s2, n_bins) else: ( @@ -242,7 +245,9 @@ def spatial_detection_scores( if mask != 0.0: bin_image_z_score = bin_image_z_score > np.quantile(bin_image_z_score, mask) - bin_image_difference = bin_image_difference > np.quantile(bin_image_difference, mask) + bin_image_difference = bin_image_difference > np.quantile( + bin_image_difference, mask + ) bin_image_ratio = bin_image_ratio > np.quantile(bin_image_ratio, mask) bin_image_counts = bin_image_counts > np.quantile(bin_image_counts, mask) @@ -302,7 +307,7 @@ def spatial_detection_scores( if use_kde: ret["z_score_estimator"] = estimator - + if in_place: return ret else: @@ -310,6 +315,7 @@ def spatial_detection_scores( ret["reference"] = s1 return ret + def summarize_and_plot( spatial_density_results, min_cells_per_bin=50, From 83ee279a695296976603c61e42abcd7f389a164b Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 21:25:21 +0000 Subject: [PATCH 09/12] black/white mask --- spatial_compare/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 19ac490..1752469 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -277,11 +277,16 @@ def spatial_detection_scores( for ii, plot_name in enumerate(min_maxes.keys()): ax = axs[ii] + if mask != 0.0: + cmap = "Greys" + else: + cmap = "woolwarm_r" + if n_bins != 0: pcm = ax.imshow( min_maxes[plot_name][0], extent=extent, - cmap="coolwarm_r", + cmap=cmap, # vmin=min_maxes[plot_name][1][0], # vmax=min_maxes[plot_name][1][1], ) @@ -290,7 +295,7 @@ def spatial_detection_scores( s2.x_centroid.values, -s2.y_centroid.values, c=min_maxes[plot_name][0], - cmap="coolwarm_r", + cmap=cmap, ) fig.colorbar(pcm, ax=ax, shrink=0.7) ax.set_title(query_name + "\n" + plot_name) From ad4396256f2dbb2781d5b16daff7f44cde16272c Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 21:28:07 +0000 Subject: [PATCH 10/12] fix --- spatial_compare/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 1752469..d8eb585 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -280,7 +280,7 @@ def spatial_detection_scores( if mask != 0.0: cmap = "Greys" else: - cmap = "woolwarm_r" + cmap = "coolwarm_r" if n_bins != 0: pcm = ax.imshow( From 6d2daf6ea3af38c6d0cf2e5aa10bc336c31678cc Mon Sep 17 00:00:00 2001 From: Jordan Sicherman Date: Wed, 11 Dec 2024 21:31:37 +0000 Subject: [PATCH 11/12] document --- spatial_compare/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index d8eb585..7beb6d7 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -166,6 +166,7 @@ def spatial_detection_scores( in_place (bool, optional): Whether to modify the query data in place. Defaults to True. non_spatial (bool, optional): Whether to compare to an ungrouped mean/std. Defaults to False. use_kde (bool, optional): Whether to use kernel-density estimates instead of taking binned averages. Samples the KDE on a `n_bins` square grid for plotting, unless `n_bins == 0` (in which case it will be sampled at the cell coordinates). Defaults to False. + mask (float, optional): A quantile at which to create binary masks from. Defaults to 0.0 (do not binarize). Returns: dict: A dictionary containing the bin image, extent, query data, and reference data (if in_place is False). From ac77c3046b396ef2feb851824c176d98595e47d8 Mon Sep 17 00:00:00 2001 From: Brian Long Date: Thu, 12 Dec 2024 15:34:56 -0800 Subject: [PATCH 12/12] add back fixed z-score range for plotting --- spatial_compare/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spatial_compare/utils.py b/spatial_compare/utils.py index 7beb6d7..1d7aefa 100644 --- a/spatial_compare/utils.py +++ b/spatial_compare/utils.py @@ -258,7 +258,7 @@ def spatial_detection_scores( else: title_string = "Spatial Detection Scores" min_maxes = { - "detection z-score": [bin_image_z_score, [-1, 1]], + "detection z-score": [bin_image_z_score, [-2, 2]], "total counts difference": [bin_image_difference, [-100, 100]], "log10(detection ratio)": [bin_image_ratio, [-1, 1]], } @@ -288,8 +288,8 @@ def spatial_detection_scores( min_maxes[plot_name][0], extent=extent, cmap=cmap, - # vmin=min_maxes[plot_name][1][0], - # vmax=min_maxes[plot_name][1][1], + vmin=min_maxes[plot_name][1][0], + vmax=min_maxes[plot_name][1][1], ) else: pcm = ax.scatter(