Added additional parameters for clustering parameters and batch corre…

…ction; split the clustering function into initial and final clustering functions; updated examples in all functions.
SydneyBioX · Oct 30, 2024 · 738dfd6 · 738dfd6
1 parent 374914b
commit 738dfd6
Show file tree

Hide file tree

Showing 17 changed files with 458 additions and 251 deletions.
diff --git a/R/adaptiveSmoothing.R b/R/adaptiveSmoothing.R
@@ -37,8 +37,8 @@
 #'
 #' # requires matrix containing NN nearest neighbour cell labels (nnCells),
 #' # generated using the neighbourDetect() function
-#' spe <- adaptiveSmoothing(spe, nnCells, NN = 30, kernel = "G", spread = 0.05,
-#'                          threads = 1)
+#' spe <- clustSIGNAL::adaptiveSmoothing(spe, nnCells, NN = 30, kernel = "G",
+#'                                       spread = 0.05, threads = 1)
 #' spe
 #'
 #' @export

diff --git a/R/clustSIGNAL.R b/R/clustSIGNAL.R
@@ -15,6 +15,8 @@
 #' Default value is 'None'.
 #' @param batch a logical parameter for whether or not to perform batch
 #' correction. Default value is FALSE.
+#' @param batch_by a character indicating name of colData(spe) column containing
+#' the batch names.
 #' @param NN an integer for the number of neighbourhood cells the function
 #' should consider. The value must be greater than or equal to 1. Default value
 #' is 30.
@@ -29,25 +31,25 @@
 #' after region description. Default value is TRUE.
 #' @param threads a numeric value for the number of CPU cores to be used for the
 #' analysis. Default value set to 1.
+#' @param clustParams a list of parameters for TwoStepParam clustering methods.
+#' The clustering parameters are in the order - centers (centers) for clustering
+#' with KmeansParam, centers (centers) for sub-clustering clusters with
+#' KmeansParam, maximum iterations (iter.max) for clustering with KmeansParam,
+#' k values (k) for clustering with NNGraphParam, and community detection method
+#' (cluster.fun) to use with NNGraphParam.
 #' @param outputs a character for the type of output to return to the user. "c"
-#' for data frame of cell IDs and their respective cluster numbers (default), "n"
-#' for list of dataframe of clusters plus neighbourhood matrix, "s" for list of
-#' dataframe of clusters plus final spatialExperiment object, or "a" for list of
-#' all outputs.
-#' @param ... additional parameters for TwoStepParam clustering methods. Include
-#' parameters like k for number of nearest neighbours and cluster.fun for
-#' selecting community detection method.
+#' for data frame of cell IDs and their respective cluster numbers (default)
+#' and "a" for list of dataframe of clusters plus final SpatialExperiment
+#' object.
 #'
 #' @return a list of outputs
 #'
 #' 1. clusters: a data frame of cell names and their cluster classification.
 #'
-#' 2. neighbours: a matrix of cell names and the names of their NN nearest
-#' neighbour cells.
-#'
-#' 3. spe_final: a SpatialExperiment object with initial 'putative cell type'
+#' 2. spe_final: a SpatialExperiment object with initial 'putative cell type'
 #' groups, entropy values, smoothed gene expression, post-smoothing clusters,
 #' and silhouette widths included.
+#'
 #' @importFrom SpatialExperiment spatialCoords
 #' @importFrom SingleCellExperiment reducedDimNames
 #' @importFrom methods show
@@ -66,8 +68,11 @@
 #' @export
 
 clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,
-                         NN = 30, kernel = "G", spread = 0.05, sort = TRUE,
-                         threads = 1, outputs = "c", ...) {
+                         batch_by = "None", NN = 30, kernel = "G",
+                         spread = 0.05, sort = TRUE, threads = 1, outputs = "c",
+                         clustParams = list(clust_c = 0, subclust_c = 0,
+                                            iter.max = 30, k = 5,
+                                            cluster.fun = "louvain")) {
     time_start <- Sys.time()
     # data and parameter checks
     if (is.null(spatialCoords(spe)) == TRUE){
@@ -95,22 +100,16 @@ clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,
 
     # Non-spatial clustering to identify initial cluster groups
     # reclust should always be FALSE here
-    spe <- nsClustering(spe = spe, samples = samples, dimRed = dimRed,
-                        batch = batch, reclust = FALSE, ...)
+    spe <- p1_clustering(spe, dimRed, batch, batch_by, clustParams)
     # Neighborhood detection, and sorting if sort = TRUE
-    outReg <- neighbourDetect(spe = spe, samples = samples, NN = NN,
-                              cells = cells, sort = sort)
+    outReg <- neighbourDetect(spe, samples, NN, cells, sort)
     # Calculating domainness of cell neighborhoods
-    spe <- entropyMeasure(spe = spe, cells = cells,
-                          regXclust = outReg$regXclust, threads = threads)
+    spe <- entropyMeasure(spe, cells, outReg$regXclust, threads)
     # Weighted smoothing guided by neighbourhood entropy
-    spe <- adaptiveSmoothing(spe = spe, nnCells = outReg$nnCells, NN = NN,
-                             kernel = kernel, spread = spread,
-                             threads = threads)
+    spe <- adaptiveSmoothing(spe, outReg$nnCells, NN, kernel, spread, threads)
     # Non-spatial clustering of adaptively smoothed expression
     # reclust should always be TRUE here
-    spe <- nsClustering(spe = spe, samples = samples, batch = batch,
-                        reclust = TRUE, ...)
+    spe <- p2_clustering(spe, batch, batch_by, clustParams)
 
     cluster_df <- data.frame("Cells" = spe[[cells]],
                              "Clusters" = spe$clustSIGNAL)
@@ -119,14 +118,8 @@ clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,
     show(time_end - time_start)
     if (outputs == "c"){
         return (list("clusters" = cluster_df))
-    } else if (outputs == "n"){
-        return (list("clusters" = cluster_df,
-                     "neighbours" = outReg$nnCells))
-    } else if (outputs == "s") {
-        return (list("clusters" = cluster_df,
-                     "spe_final" = spe))
     } else if (outputs == "a") {
         return (list("clusters" = cluster_df,
-                     "neighbours" = outReg$nnCells,
-                     "spe_final" = spe))}
+                     "spe_final" = spe))
+    }
 }
diff --git a/R/clustering.R b/R/clustering.R
diff --git a/R/entropyMeasure.R b/R/entropyMeasure.R
@@ -17,6 +17,7 @@
 #'
 #' @return SpatialExperiment object including entropy values for each cell
 #' neighbourhood.
+#'
 #' @importFrom BiocParallel bplapply
 #' @importFrom methods show
 #'
@@ -25,7 +26,7 @@
 #'
 #' # requires list containing cluster proportions of each region (regXclust),
 #' # generated using the neighbourDetect() function
-#' spe <- entropyMeasure(spe, cells = "uniqueID", regXclust, threads = 1)
+#' spe <- clustSIGNAL::entropyMeasure(spe, cells = "uniqueID", regXclust, threads = 1)
 #' head(spe$entropy)
 #'
 #' @export

diff --git a/R/finalClustering.R b/R/finalClustering.R
@@ -0,0 +1,75 @@
+#' Final non-spatial clustering
+#'
+#' @description
+#' A function containing to cluster adaptively smoothed gene expression data.
+#'
+#' @param spe SpatialExperiment object. For reclust = FALSE, the object should
+#' contain logcounts and PCA, but for reculst = TRUE, the object should contain
+#' smoothed gene expression.
+#' @param batch a logical parameter for whether or not to perform batch
+#' correction. Default value is FALSE.
+#' @param batch_by a character indicating name of colData(spe) column containing
+#' the batch names.
+#' @param clustParams a list of parameters for TwoStepParam clustering methods.
+#' The clustering parameters are in the order - centers (centers) for clustering
+#' with KmeansParam, centers (centers) for sub-clustering clusters with
+#' KmeansParam, maximum iterations (iter.max) for clustering with KmeansParam,
+#' k values (k) for clustering with NNGraphParam, and community detection method
+#' (cluster.fun) to use with NNGraphParam.
+#'
+#' @return SpatialExperiment object containing clusters generated from smoothed
+#' data.
+#'
+#' @importFrom bluster clusterRows TwoStepParam KmeansParam NNGraphParam
+#' @importFrom scater runPCA
+#' @importFrom harmony RunHarmony
+#' @importFrom SummarizedExperiment colData
+#' @importFrom SingleCellExperiment reducedDim
+#' @importFrom methods show
+#' @importFrom stats setNames
+#'
+#' @examples
+#' data(example)
+#'
+#' # For non-spatial clustering of normalised counts
+#' spe <- clustSIGNAL::p2_clustering(spe, batch = FALSE, batch_by = "None",
+#'                                  clustParams = list(0, 0, 30, 5, "louvain"))
+#' head(spe$clustSIGNAL)
+#'
+#' @export
+
+#### Non-spatial clustering
+p2_clustering <- function(spe, batch, batch_by, clustParams) {
+    if (clustParams[[1]] == 0){
+        # number of centers = 1/5th of total cells in sample
+        clustVal <- min(as.integer(ncol(spe) / 5), 5000)
+    } else {
+        clustVal <- clustParams[[1]]
+    }
+    # Clustering adaptively smoothed data
+    spe <- scater::runPCA(spe, assay.type = "smoothed", name = "PCA.smooth")
+    if (batch == TRUE) {
+        emb <- harmony::RunHarmony(data_mat = reducedDim(spe, "PCA.smooth"),
+                                   meta_data = colData(spe),
+                                   vars_use = batch_by, max.iter = 20,
+                                   verbose = FALSE)
+        mat <- emb
+    } else {
+        mat <- reducedDim(spe, "PCA.smooth")}
+    # reClust <- bluster::clusterRows(mat,
+    #                                 bluster::TwoStepParam(
+    #                                     first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
+    #                                     second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
+    reClust <- bluster::clusterRows(
+        mat,
+        bluster::TwoStepParam(
+            first = bluster::KmeansParam(centers = clustVal,
+                                         iter.max = clustParams[[3]]),
+            second = bluster::NNGraphParam(k = clustParams[[4]],
+                                           cluster.fun = clustParams[[5]])))
+    spe$clustSIGNAL <- factor(reClust)
+    show(paste("Nonspatial clustering performed on smoothed data. Clusters =",
+               length(unique(reClust)), "Time",
+               format(Sys.time(),'%H:%M:%S')))
+    return (spe)
+}