diff --git a/analyses/cell-type-ewings/scripts/utils/clustering-functions.R b/analyses/cell-type-ewings/scripts/utils/clustering-functions.R
index 2bc3f34c7..6028bc029 100644
--- a/analyses/cell-type-ewings/scripts/utils/clustering-functions.R
+++ b/analyses/cell-type-ewings/scripts/utils/clustering-functions.R
@@ -10,139 +10,74 @@ source(jaccard_functions)
 validation_functions <- file.path(module_base, "scripts", "utils", "tumor-validation-helpers.R")
 source(validation_functions)
 
-# Perform clustering -----------------------------------------------------------
-
-# get louvain, jaccard clusters for a specified value of k (nearest neighbors)
-get_clusters <- function(pcs, k) {
-  clusters <- bluster::clusterRows(
-    pcs,
-    bluster::NNGraphParam(
-      k = k,
-      type = "jaccard",
-      cluster.fun = "louvain"
-    )
-  )
-
-  return(clusters)
-}
-
-# define a function to perform clustersweep and get clusters across multiple values of k (5,40,5)
-cluster_sweep <- function(sce) {
-  # first perform clustering across parameters
-  cluster_results <- bluster::clusterSweep(reducedDim(sce, "PCA"),
-    bluster::NNGraphParam(),
-    k = as.integer(seq(5, 40, 5)),
-    cluster.fun = "louvain",
-    type = "jaccard"
-  )
-
-  # turn results into a data frame
-  cluster_df <- cluster_results$clusters |>
-    as.data.frame() |>
-    # add barcode column
-    dplyr::mutate(barcodes = colnames(sce)) |>
-    # combine all cluster results into one column
-    tidyr::pivot_longer(
-      cols = ends_with("jaccard"),
-      names_to = "params",
-      values_to = "cluster"
-    ) |>
-    # separate out parameters, nn, function, and type into their own columns
-    dplyr::mutate(
-      nn_param = stringr::word(params, 1, sep = "_") |>
-        stringr::str_replace("k.", "k_"),
-      cluster_fun = stringr::word(params, 2, sep = "_") |>
-        stringr::str_remove("cluster.fun."),
-      cluster_type = stringr::word(params, -1, sep = "_") |>
-        stringr::str_remove("type.")
-    ) |>
-    # remove combined params column
-    dplyr::select(-params)
-
-  return(cluster_df)
-}
-
 # cluster statistics functions -------------------------------------------------
 
 
 # get silhouette width and cluster purity for each cluster
-# calculates values across all nn_param options used to determine clustering
-# all_cluster_results must have nn_param column
+# calculates values across all parameters used to determine clustering
+# all_cluster_results must have cluster_params column
 get_cluster_stats <- function(sce,
                               all_cluster_results) {
   pcs <- reducedDim(sce, "PCA")
 
   # split clustering results by param used
   split_clusters <- all_cluster_results |>
-    split(all_cluster_results$nn_param)
+    split(all_cluster_results$cluster_params)
 
   # for each nn_param get cluster width and purity
   all_stats_df <- split_clusters |>
     purrr::map(\(df){
       sil_df <- bluster::approxSilhouette(pcs, df$cluster) |>
         as.data.frame() |>
-        tibble::rownames_to_column("barcodes")
+        tibble::rownames_to_column("cell_id")
 
       purity_df <- bluster::neighborPurity(pcs, df$cluster) |>
         as.data.frame() |>
-        tibble::rownames_to_column("barcodes")
+        tibble::rownames_to_column("cell_id")
 
       # join into one data frame to return
       stats_df <- sil_df |>
-        dplyr::left_join(purity_df, by = "barcodes")
+        dplyr::left_join(purity_df, by = "cell_id")
 
       return(stats_df)
     }) |>
-    dplyr::bind_rows(.id = "nn_param")
+    dplyr::bind_rows(.id = "cluster_params") |> 
+    dplyr::left_join(all_cluster_results, by = c("cell_id", "cluster_params"))
 
   return(all_stats_df)
 }
 
-# calculate cluster stability for a single set of clusters using ari
-# bootstrap and get ari for clusters compared to sampled clusters
-# re-clusters and gets ari across 20 iterations
-get_ari <- function(pcs,
-                    clusters,
-                    k) {
-  ari <- c()
-  for (iter in 1:20) {
-    # sample cells with replacement
-    sample_cells <- sample(nrow(pcs), nrow(pcs), replace = TRUE)
-    resampled_pca <- pcs[sample_cells, , drop = FALSE]
-
-    # perform clustering on sampled cells
-    resampled_clusters <- get_clusters(resampled_pca, k)
-
-    # calculate ARI between new clustering and original clustering
-    ari[iter] <- pdfCluster::adj.rand.index(resampled_clusters, clusters[sample_cells])
-  }
-
-  ari_df <- data.frame(
-    ari = ari,
-    k_value = k
-  )
-}
-
-# get cluster stability for each nn_param cluster results are available for
+# get cluster stability for each unique combination of params used for clustering
+# must have `cluster_params` column
 get_cluster_stability <- function(sce,
                                   all_cluster_results) {
   pcs <- reducedDim(sce, "PCA")
-
+  
   # split clustering results by param used
   cluster_df_list <- all_cluster_results |>
-    split(all_cluster_results$nn_param)
-
+    split(all_cluster_results$cluster_params)
+  
   # for each parameter, get ari values
   cluster_stability_df <- cluster_df_list |>
-    purrr::imap(\(df, k_value){
-      # make sure k is numeric and remove extra k_
-      k <- stringr::str_remove(k_value, "k_") |>
-        as.numeric()
-
-      get_ari(pcs, df$cluster, k)
+    purrr::map(\(df){
+      
+      # make sure we set objective function to available options
+      objective_function <- dplyr::if_else(!is.na(unique(df$objective_function)),
+                                           unique(df$objective_function),
+                                           "CPM")
+                                           
+      
+      # run stability 
+      rOpenScPCA::calculate_stability(sce,
+                                      cluster_df = df,
+                                      algorithm = unique(df$algorithm),
+                                      nn = unique(df$nn),
+                                      resolution = unique(df$resolution),
+                                      objective_function = objective_function)
+      
     }) |>
-    dplyr::bind_rows()
-
+    dplyr::bind_rows(.id = "cluster_params")
+  
   return(cluster_stability_df)
 }
 
@@ -150,12 +85,41 @@ get_cluster_stability <- function(sce,
 
 # plot individual stats for clusters, either purity or width
 plot_cluster_stats <- function(all_stats_df,
-                               stat_column) {
-  ggplot(all_stats_df, aes(x = nn_param, y = {{ stat_column }})) +
+                               stat_column,
+                               plot_title) {
+  ggplot(all_stats_df, aes(x = nn, y = {{ stat_column }})) +
     # ggforce::geom_sina(size = .2) +
     ggbeeswarm::geom_quasirandom(method = "smiley", size = 0.1) +
+    facet_wrap(vars(resolution),
+               labeller = labeller(resolution = ~ glue::glue("{.}-res"))) +
+    stat_summary(
+      aes(group = nn),
+      color = "red",
+      # median and quartiles for point range
+      fun = "median",
+      fun.min = function(x) {
+        quantile(x, 0.25)
+      },
+      fun.max = function(x) {
+        quantile(x, 0.75)
+      }
+    ) +
+    labs(
+      title = plot_title
+    )
+}
+
+# plot cluster stability 
+plot_cluster_stability <- function(stat_df,
+                                   plot_title){
+  
+  ggplot(stability_df, aes(x = nn, y = ari)) +
+    geom_jitter(width = 0.1) +
+    facet_wrap(vars(resolution),
+               labeller = labeller(resolution = ~ glue::glue("{.}-res"))) +
+    labs(title = "Cluster stability") +
     stat_summary(
-      aes(group = nn_param),
+      aes(group = nn),
       color = "red",
       # median and quartiles for point range
       fun = "median",
@@ -165,7 +129,11 @@ plot_cluster_stats <- function(all_stats_df,
       fun.max = function(x) {
         quantile(x, 0.75)
       }
+    ) +
+    labs(
+      title = plot_title
     )
+  
 }
 
 
diff --git a/analyses/cell-type-ewings/template_notebooks/auc-singler-workflow/02-singler-results.Rmd b/analyses/cell-type-ewings/template_notebooks/auc-singler-workflow/02-singler-results.Rmd
index fd1964a19..91d8c0a73 100644
--- a/analyses/cell-type-ewings/template_notebooks/auc-singler-workflow/02-singler-results.Rmd
+++ b/analyses/cell-type-ewings/template_notebooks/auc-singler-workflow/02-singler-results.Rmd
@@ -209,6 +209,30 @@ The annotations are shown below the heatmap.
 - Density plot showing gene expression or gene set scores across all cells. 
 Each row is a cell type and the expression or score is plotted on the x-axis. 
 
+```{r}
+# check that marker genes are expressed, otherwise turn off those plots
+total_exp <- sum(classification_df[marker_gene_columns])
+if(total_exp > 0){
+  show_marker_gene_plots <- TRUE
+} else {
+  show_marker_gene_plots <- FALSE
+  message("No expression of marker genes in this library. No plots will be displayed in sections labeled 'Marker gene expression'.")
+}
+
+```
+
+```{r}
+# check that gene set scores aren't all 0, otherwise turn off those plots
+total_score <- sum(classification_df[geneset_columns])
+if(total_score > 0){
+  show_gene_set_plots <- TRUE
+} else {
+  show_gene_set_plots <- FALSE
+  message("Genes present in provided gene sets are not expressed in this library. No plots will be displayed in sections labeled 'Gene set scores'.")
+}
+
+```
+
 
 ### Tumor vs. Normal 
 
@@ -216,12 +240,13 @@ In this section we show just the cells that are considered tumor and normal, lum
 
 **Marker gene expression**
 
-```{r}
-full_celltype_heatmap(classification_df, marker_gene_columns, "singler_tumor_normal")
+
+```{r, eval=show_marker_gene_plots}
+full_celltype_heatmap(classification_df, marker_gene_columns, "singler_tumor_normal") 
 ```
 
 
-```{r}
+```{r, eval=show_marker_gene_plots}
 plot_density(
   classification_df,
   "tumor_sum",
@@ -231,11 +256,11 @@ plot_density(
 
 **Gene set scores**
 
-```{r}
+```{r, eval=show_gene_set_plots}
 full_celltype_heatmap(classification_df, geneset_columns, "singler_tumor_normal")
 ```
 
-```{r, fig.height=10}
+```{r, fig.height=10, eval=show_gene_set_plots}
 geneset_columns |>
   purrr::map(\(column){
     plot_density(
@@ -254,11 +279,11 @@ In this section we show all tumor cells and the top 5 most represented normal ce
 
 **Marker gene expression** 
 
-```{r}
+```{r, eval=show_marker_gene_plots}
 full_celltype_heatmap(classification_df, marker_gene_columns, "singler_lumped")
 ```
 
-```{r, fig.height=10}
+```{r, fig.height=10, eval=show_marker_gene_plots}
 marker_gene_columns |>
   purrr::map(\(column){
     plot_density(
@@ -273,11 +298,11 @@ marker_gene_columns |>
 
 **Gene set scores**
 
-```{r}
+```{r, eval=show_gene_set_plots}
 full_celltype_heatmap(classification_df, geneset_columns, "singler_lumped")
 ```
 
-```{r, fig.height=10}
+```{r, fig.height=10, eval=show_gene_set_plots}
 geneset_columns |>
   purrr::map(\(column){
     plot_density(
@@ -295,12 +320,12 @@ Here we compare the marker gene expression and gene set scores for cells annotat
 
 **Marker gene expression**
 
-```{r}
+```{r, eval=show_marker_gene_plots}
 full_celltype_heatmap(classification_df, marker_gene_columns, "consensus")
 ```
 
 
-```{r}
+```{r, eval=show_marker_gene_plots}
 plot_density(
   classification_df,
   "tumor_sum",
@@ -311,11 +336,11 @@ plot_density(
 
 **Gene set scores**
 
-```{r}
+```{r, eval=show_gene_set_plots}
 full_celltype_heatmap(classification_df, geneset_columns, "consensus")
 ```
 
-```{r, fig.height=10}
+```{r, fig.height=10, eval=show_gene_set_plots}
 geneset_columns |>
   purrr::map(\(column){
     plot_density(
diff --git a/analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering-metrics.Rmd b/analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering-metrics.Rmd
new file mode 100644
index 000000000..b1db177bd
--- /dev/null
+++ b/analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering-metrics.Rmd
@@ -0,0 +1,267 @@
+---
+title: "Clustering exploration for `r params$library_id`"
+author: Ally Hawkins
+date: "`r Sys.Date()`"
+output:
+  html_document:
+    toc: true
+    toc_depth: 3
+    code_folding: "hide"
+params:
+  library_id: "SCPCL000822"
+  sce_file: "../../../../data/current/SCPCP000015/SCPCS000490/SCPCL000822_processed.rds"
+  cluster_results_file: "SCPCL000822_clusters.tsv"
+  nn_range: !r seq(5, 40, 5)
+  louvain_res_range: !r c(0.5, 1, 1.5)
+  mod_res_range: !r c(0.5, 1, 1.5)
+  cpm_res_range: !r c(0.001, 0.005, 0.01)
+---
+
+This notebook summarizes clustering metrics for `r params$library_id`.
+
+Louvain clusters are calculated using the following range of parameters: 
+
+- Nearest neighbors: `r paste0(params$nn_range, collapse = ", ")`
+- Resolution:`r paste0(params$louvain_res_range, collapse = ", ")`
+
+Leiden clusters are calculated using modularity as the objective function with the following range of parameters: 
+
+- Objective function: modularity 
+- Nearest neighbors: `r paste0(params$nn_range, collapse = ", ")`
+- Resolution: `r paste0(params$mod_res_range, collapse = ", ")`
+
+Leiden clusters are calculated using CPM as the objective function with the following range of parameters: 
+
+- Objective function: CPM 
+- Nearest neighbors: `r paste0(params$nn_range, collapse = ", ")`
+- Resolution: `r paste0(params$cpm_res_range, collapse = ", ")`
+
+- Metrics are then calculated to evaluate clustering results across all parameters: 
+  - Silhouette width 
+  - Cluster purity 
+  - Cluster stability 
+
+## Setup
+
+```{r}
+# check that sce file exists
+stopifnot(
+  "sce file does not exist" = file.exists(params$sce_file)
+)
+```
+
+
+```{r packages}
+suppressPackageStartupMessages({
+  # load required packages
+  library(SingleCellExperiment)
+  library(ggplot2)
+})
+
+# Set default ggplot theme
+theme_set(
+  theme_classic()
+)
+
+# set seed
+set.seed(2024)
+```
+
+
+```{r base paths}
+# The path to this module
+module_base <- rprojroot::find_root(rprojroot::is_renv_project)
+
+# source in clustering functions
+clustering_functions <- file.path(module_base, "scripts", "utils", "clustering-functions.R")
+source(clustering_functions)
+```
+
+
+```{r}
+# read in input sce files
+sce <- readr::read_rds(params$sce_file)
+```
+
+
+## Calculate clusters
+
+
+```{r}
+# louvain clustering across all parameters 
+louvain_results <- rOpenScPCA::sweep_clusters(
+  sce,
+  algorithm = "louvain",
+  nn = params$nn_range,
+  resolution = params$louvain_res_range
+) |> 
+  dplyr::bind_rows() |> 
+  # add in objective function so we can combine with leiden results
+  dplyr::mutate(objective_function = NA_character_)
+
+# leiden cpm clustering
+leiden_cpm_results <- rOpenScPCA::sweep_clusters(
+  sce,
+  algorithm = "leiden",
+  nn = params$nn_range,
+  resolution = params$cpm_res_range,
+  objective_function = "CPM"
+) |> 
+  dplyr::bind_rows()
+
+# leiden modularity clustering
+leiden_mod_results <- rOpenScPCA::sweep_clusters(
+  sce,
+  algorithm = "leiden",
+  nn = params$nn_range,
+  resolution = params$mod_res_range,
+  objective_function = "modularity"
+) |> 
+  dplyr::bind_rows()
+```
+
+
+```{r}
+# join all clustering results 
+all_cluster_results <- dplyr::bind_rows(list(louvain_results, leiden_cpm_results, leiden_mod_results)) |> 
+  # add column for method + objective function 
+  # use this for splitting plots throughout report
+  dplyr::mutate(cluster_method = glue::glue(
+    "{algorithm}-{objective_function}"
+  ) |> 
+    stringr::str_remove("-NA")
+  )
+```
+
+
+## Visualize clusters with UMAP
+
+```{r}
+# get umap embeddings and combine into a data frame with cluster assignments
+umap_df <- sce |>
+  scuttle::makePerCellDF(use.dimred = "UMAP") |>
+  # replace UMAP.1 with UMAP1 and get rid of excess columns
+  dplyr::select("cell_id" = barcodes, UMAP1 = UMAP.1, UMAP2 = UMAP.2) |>
+  dplyr::left_join(all_cluster_results, by = "cell_id")
+
+split_umap_df <- umap_df |> 
+  # add a column that shows the combination of nn/res so we get stats for each unique combo of params 
+  dplyr::mutate(cluster_params = glue::glue("{nn}nn-{resolution}res")) |> 
+  split(umap_df$cluster_method)
+```
+
+
+Below we visualize the cluster assignments using each parameter on a UMAP. 
+This can be helpful to see if any parameters lead to any obvious over or under clustering. 
+
+```{r, fig.height=15, fig.width=10}
+# look at clustering results for each library across all params
+split_umap_df |> 
+  purrr::imap(\(df, method){
+    
+    ggplot(df, aes(x = UMAP1, y = UMAP2, color = cluster)) +
+      geom_point(alpha = 0.5, size = 0.1) +
+      facet_grid(rows = vars(nn),
+                 cols = vars(resolution),
+                 labeller = labeller(nn = ~ glue::glue("{.}-nn"),
+                                     resolution = ~ glue::glue("{.}-res"))) +
+      theme(
+        aspect.ratio = 1,
+        legend.position = "none", 
+        panel.border = element_rect(color = "black", fill = NA)
+      ) +
+      labs(title = method)
+    
+  })
+```
+
+
+## Clustering statistics 
+
+Below we calculate a series of statistics: 
+
+- Average silhouette width: This metric evaluates cluster separation. 
+Values range from -1 to 1.
+Cells with large positive silhouette widths are closer to other cells in the same cluster than to cells in different clusters. 
+Higher values indicate tighter clusters.
+- Average cluster purity: This metric also evaluates cluster separation and tells us the proportion of neighboring cells that are assigned to the same cluster. 
+Purity values range from 0-1 with higher purity values indicating clusters that are well separated. 
+- Cluster stability: This evaluates how stable the clustering is to input data. 
+Stability values range from 0-1 with higher values of cluster stability indicating more reproducible clusters. 
+
+```{r, warning=FALSE}
+# get a combined stats dataframe with purity and width for all clusters
+all_stats <- split_umap_df |> 
+  purrr::map(\(df) {get_cluster_stats(sce,df)})
+```
+
+### Silhouette width
+
+```{r}
+# silhouette width for different params
+all_stats |> 
+  purrr::imap(\(df, cluster_method){plot_cluster_stats(df, width, cluster_method)})
+```
+
+### Cluster purity 
+
+```{r}
+# cluster purity for different params
+all_stats |> 
+  purrr::imap(\(df, cluster_method){plot_cluster_stats(df, purity, cluster_method)})
+```
+
+### Cluster stability 
+
+```{r, warning=FALSE, message=FALSE}
+# calculate cluster stability
+stability_stats <- split_umap_df |> 
+  purrr::map(\(df){ get_cluster_stability(sce, df)})
+```
+
+
+```{r, warning=FALSE, message=FALSE}
+# plot stability across all parameters
+stability_stats |> 
+  purrr::imap(\(df, cluster_method){
+    
+    
+    # plot stability across all values of k
+    ggplot(df, aes(x = nn, y = ari)) +
+      geom_jitter(width = 0.1) +
+      facet_wrap(vars(resolution),
+                 labeller = labeller(resolution = ~ glue::glue("{.}-res"))) +
+      labs(title = "Cluster stability") +
+      stat_summary(
+        aes(group = nn),
+        color = "red",
+        # median and quartiles for point range
+        fun = "median",
+        fun.min = function(x) {
+          quantile(x, 0.25)
+        },
+        fun.max = function(x) {
+          quantile(x, 0.75)
+        }
+      )
+    
+    
+  })
+```
+
+## Export clusters 
+
+```{r}
+# export all clustering results
+readr::write_tsv(all_cluster_results, params$cluster_results_file)
+```
+
+
+## Session info 
+
+```{r session info}
+# record the versions of the packages used in this analysis and other environment information
+sessionInfo()
+```
+
+
diff --git a/analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering.Rmd b/analyses/cell-type-ewings/template_notebooks/clustering-workflow/02-clustering-celltypes.Rmd
similarity index 53%
rename from analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering.Rmd
rename to analyses/cell-type-ewings/template_notebooks/clustering-workflow/02-clustering-celltypes.Rmd
index 69104fd83..0849ca334 100644
--- a/analyses/cell-type-ewings/template_notebooks/clustering-workflow/01-clustering.Rmd
+++ b/analyses/cell-type-ewings/template_notebooks/clustering-workflow/02-clustering-celltypes.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Clustering exploration for `r params$library_id`"
+title: "Comparison of clusters to cell types for `r params$library_id`"
 author: Ally Hawkins
 date: "`r Sys.Date()`"
 output:
@@ -8,20 +8,15 @@ output:
     toc_depth: 3
     code_folding: "hide"
 params:
-  library_id: NULL
-  sce_file: NULL
-  singler_results_file: NULL
-  cluster_results_file: NULL
-  marker_genes_file: NULL
+  library_id: "SCPCL000822"
+  sce_file: "../../../../data/current/SCPCP000015/SCPCS000490/SCPCL000822_processed.rds"
+  singler_results_file: "../../results/aucell_singler_annotation/SCPCS000490/SCPCL000822_singler-classifications.tsv"
+  cluster_results_file: "../../results/cluster_exploration/SCPCS000490/SCPCL000822_clusters.tsv"
+  marker_genes_file: "../../references/visser-all-marker-genes.tsv"
 ---
 
-This notebook summarizes clustering for `r params$library_id`.
+TODO: Update this notebook to take as input a clustering algorithm, nn (range of nn?), resolution, and optional objective function to display in SingleR and marker gene plots
 
-- Louvain, Jaccard clusters are calculated using a range of parameters specifying nearest neighbors (5, 10, 15, 20, 25, 30, 35, and 40). 
-- Metrics are then calculated to evaluate clustering results across all parameters: 
-  - Silhouette width 
-  - Cluster purity 
-  - Cluster stability 
 - The clusters are then compared to the results from running `SingleR` in the `aucell-singler-annotation.sh` workflow.
 - We then look at marker gene expression across the clusters and assign each cluster to a cell type. 
 
@@ -52,6 +47,9 @@ theme_set(
 # quiet messages
 options(readr.show_col_types = FALSE)
 ComplexHeatmap::ht_opt(message = FALSE)
+
+# set seed
+set.seed(2024)
 ```
 
 
@@ -74,112 +72,12 @@ source(clustering_functions)
 sce <- readr::read_rds(params$sce_file)
 
 # read in singler results
-singler_classification_df <- readr::read_tsv(params$singler_results_file)
-```
-
-## Clustering
-
-Below we perform Louvain, Jaccard clustering, varying `k`. 
-The minimum `k` is 5 and the maximum `k` is 40 with a step size of 5. 
-
-```{r}
-# perform clustering across k = 5-40 with increments of 5
-all_cluster_results <- cluster_sweep(sce) |>
-  dplyr::mutate(
-    nn_param = forcats::fct_relevel(nn_param, "k_5", after = 0)
-  )
-```
-
-```{r}
-# get umap embeddings and combine into a data frame with cluster assignments
-umap_df <- sce |>
-  scuttle::makePerCellDF(use.dimred = "UMAP") |>
-  # replace UMAP.1 with UMAP1 and get rid of excess columns
-  dplyr::select(barcodes, UMAP1 = UMAP.1, UMAP2 = UMAP.2) |>
-  dplyr::left_join(all_cluster_results, by = "barcodes")
+singler_classification_df <- readr::read_tsv(params$singler_results_file) |>
+  dplyr::rename("cell_id" = "barcodes")
 ```
 
 
-Below we visualize the cluster assignments using each parameter on a UMAP. 
-This can be helpful to see any values of `k` that may have obvious over or under clustering. 
-
-```{r, fig.height=15, fig.width=10}
-# look at clustering results for each library across all params
-ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = cluster)) +
-  geom_point(alpha = 0.5, size = 0.1) +
-  facet_wrap(vars(nn_param)) +
-  theme(
-    aspect.ratio = 1,
-    legend.position = "none"
-  )
-```
-
-## Clustering statistics 
-
-Below we calculate a series of statistics: 
-
-- Average silhouette width: This metric evaluates cluster separation. 
-Cells with large positive silhouette widths are closer to other cells in the same cluster than to cells in different clusters. 
-Higher values indicate tighter clusters.
-- Average cluster purity: This metric also evaluates cluster separation and tells us the proportion of neighboring cells that are assigned to the same cluster. 
-Purity values range from 0-1 with higher purity values indicating clusters that are well separated. 
-- Cluster stability: This evaluates how stable the clustering is to input data. 
-Stability values range from 01- with higher values of cluster stability indicating more reproducible clusters. 
-
-```{r, warning=FALSE}
-# get a combined stats dataframe with purity and width for all clusters
-all_stats_df <- get_cluster_stats(sce, all_cluster_results) |>
-  dplyr::mutate(
-    # make sure the order is correct
-    nn_param = forcats::fct_relevel(nn_param, "k_5", after = 0)
-  )
-```
-
-### Silhouette width
-
-```{r}
-# silhouette width for different params
-plot_cluster_stats(all_stats_df, width)
-```
-
-### Cluster purity 
-
-```{r}
-# cluster purity for different params
-plot_cluster_stats(all_stats_df, purity)
-```
-
-### Cluster stability 
-
-```{r, warning=FALSE, message=FALSE}
-# calculate cluster stability
-stability_df <- get_cluster_stability(sce, all_cluster_results) |>
-  dplyr::mutate(
-    k_value = as.factor(k_value),
-    # make sure that k = 5 comes first
-    k_value = forcats::fct_relevel(k_value, "5", after = 0)
-  )
-
-# plot stability across all values of k
-ggplot(stability_df, aes(x = k_value, y = ari)) +
-  geom_jitter(width = 0.1) +
-  labs(title = "Cluster stability") +
-  stat_summary(
-    aes(group = k_value),
-    color = "red",
-    # median and quartiles for point range
-    fun = "median",
-    fun.min = function(x) {
-      quantile(x, 0.25)
-    },
-    fun.max = function(x) {
-      quantile(x, 0.75)
-    }
-  )
-```
-
-
-## Compare clusters to cell types from `SingleR`
+## Clusters vs. `SingleR` {.tabset}
 
 Now we will compare the clustering results to the cell type assignments obtained from `SingleR` in the `aucell-singler-annotation.sh` workflow. 
 To compare results we will calculate the Jaccard similarity index between clusters and cell types. 
@@ -190,7 +88,7 @@ For the plots, we will only display the top 7 cell types identified by `SingleR`
 ```{r}
 cluster_classification_df <- all_cluster_results |>
   # add in classifications from singler
-  dplyr::left_join(singler_classification_df, by = "barcodes") |>
+  dplyr::left_join(singler_classification_df, by = "cell_id") |>
   dplyr::mutate(
     # first grab anything that is tumor and label it tumor
     # NA should be unknown
@@ -204,9 +102,13 @@ cluster_classification_df <- all_cluster_results |>
     singler_lumped = singler_annotation |>
       forcats::fct_lump_n(7, other_level = "All remaining cell types", ties.method = "first") |>
       forcats::fct_infreq() |>
-      forcats::fct_relevel("All remaining cell types", after = Inf),
-    nn_param = forcats::fct_relevel(nn_param, "k_5", after = 0)
+      forcats::fct_relevel("All remaining cell types", after = Inf)
   )
+
+cluster_classification_df |> 
+  split(cluster_classification_df$resolution) |> 
+  purrr::map(\(df){cluster_celltype_heatmap(df)}) 
+
 ```
 
 ```{r, fig.height=10, fig.width=7}
@@ -215,7 +117,7 @@ cluster_celltype_heatmap(cluster_classification_df)
 ```
 
 
-## Marker gene expression across clusters 
+## Marker gene expression {.tabset} 
 
 Finally, we will look at the expression of marker genes across each cluster for each value of `k`. 
 In these plots, each row shows the distribution of the specified marker genes in that cluster. 
@@ -256,11 +158,10 @@ k_params |>
   })
 ```
 
+
 ## Session info 
 
 ```{r session info}
 # record the versions of the packages used in this analysis and other environment information
 sessionInfo()
 ```
-
-
diff --git a/analyses/cell-type-ewings/template_notebooks/cnv-workflow/04-infercnv.Rmd b/analyses/cell-type-ewings/template_notebooks/cnv-workflow/04-infercnv.Rmd
index a7f44a39e..75634c0e5 100644
--- a/analyses/cell-type-ewings/template_notebooks/cnv-workflow/04-infercnv.Rmd
+++ b/analyses/cell-type-ewings/template_notebooks/cnv-workflow/04-infercnv.Rmd
@@ -157,7 +157,11 @@ coldata_df <- sce |>
   dplyr::left_join(cnv_metadata_df, by = c("barcodes"))
 
 # remove MT and GL chr, comments in docs recommend that these are not useful and can be inaccurate
-chr_columns_to_keep <- which(!stringr::str_detect(names(coldata_df), "chrMT|chrGL"))
+# only keep coldata columns and chr1-22 columns
+chr_columns <- paste0("chr", seq(1,22), collapse = "|")
+all_colnames_keep <- paste0(c(names(colData(sce)), "UMAP1", "UMAP2|"), collapse = "|") |> 
+  paste0(chr_columns, collapse = "|")
+chr_columns_to_keep <- which(stringr::str_detect(names(coldata_df), all_colnames_keep))
 all_cnv_df <- coldata_df[, chr_columns_to_keep]
 ```