Skip to content

Commit

Permalink
Added additional parameters for clustering parameters and batch corre…
Browse files Browse the repository at this point in the history
…ction; split the clustering function into initial and final clustering functions; updated examples in all functions.
  • Loading branch information
PratibhaPanwar committed Oct 30, 2024
1 parent 374914b commit 738dfd6
Show file tree
Hide file tree
Showing 17 changed files with 458 additions and 251 deletions.
4 changes: 2 additions & 2 deletions R/adaptiveSmoothing.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
#'
#' # requires matrix containing NN nearest neighbour cell labels (nnCells),
#' # generated using the neighbourDetect() function
#' spe <- adaptiveSmoothing(spe, nnCells, NN = 30, kernel = "G", spread = 0.05,
#' threads = 1)
#' spe <- clustSIGNAL::adaptiveSmoothing(spe, nnCells, NN = 30, kernel = "G",
#' spread = 0.05, threads = 1)
#' spe
#'
#' @export
Expand Down
57 changes: 25 additions & 32 deletions R/clustSIGNAL.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' Default value is 'None'.
#' @param batch a logical parameter for whether or not to perform batch
#' correction. Default value is FALSE.
#' @param batch_by a character indicating name of colData(spe) column containing
#' the batch names.
#' @param NN an integer for the number of neighbourhood cells the function
#' should consider. The value must be greater than or equal to 1. Default value
#' is 30.
Expand All @@ -29,25 +31,25 @@
#' after region description. Default value is TRUE.
#' @param threads a numeric value for the number of CPU cores to be used for the
#' analysis. Default value set to 1.
#' @param clustParams a list of parameters for TwoStepParam clustering methods.
#' The clustering parameters are in the order - centers (centers) for clustering
#' with KmeansParam, centers (centers) for sub-clustering clusters with
#' KmeansParam, maximum iterations (iter.max) for clustering with KmeansParam,
#' k values (k) for clustering with NNGraphParam, and community detection method
#' (cluster.fun) to use with NNGraphParam.
#' @param outputs a character for the type of output to return to the user. "c"
#' for data frame of cell IDs and their respective cluster numbers (default), "n"
#' for list of dataframe of clusters plus neighbourhood matrix, "s" for list of
#' dataframe of clusters plus final spatialExperiment object, or "a" for list of
#' all outputs.
#' @param ... additional parameters for TwoStepParam clustering methods. Include
#' parameters like k for number of nearest neighbours and cluster.fun for
#' selecting community detection method.
#' for data frame of cell IDs and their respective cluster numbers (default)
#' and "a" for list of dataframe of clusters plus final SpatialExperiment
#' object.
#'
#' @return a list of outputs
#'
#' 1. clusters: a data frame of cell names and their cluster classification.
#'
#' 2. neighbours: a matrix of cell names and the names of their NN nearest
#' neighbour cells.
#'
#' 3. spe_final: a SpatialExperiment object with initial 'putative cell type'
#' 2. spe_final: a SpatialExperiment object with initial 'putative cell type'
#' groups, entropy values, smoothed gene expression, post-smoothing clusters,
#' and silhouette widths included.
#'
#' @importFrom SpatialExperiment spatialCoords
#' @importFrom SingleCellExperiment reducedDimNames
#' @importFrom methods show
Expand All @@ -66,8 +68,11 @@
#' @export

clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,
NN = 30, kernel = "G", spread = 0.05, sort = TRUE,
threads = 1, outputs = "c", ...) {
batch_by = "None", NN = 30, kernel = "G",
spread = 0.05, sort = TRUE, threads = 1, outputs = "c",
clustParams = list(clust_c = 0, subclust_c = 0,
iter.max = 30, k = 5,
cluster.fun = "louvain")) {
time_start <- Sys.time()
# data and parameter checks
if (is.null(spatialCoords(spe)) == TRUE){
Expand Down Expand Up @@ -95,22 +100,16 @@ clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,

# Non-spatial clustering to identify initial cluster groups
# reclust should always be FALSE here
spe <- nsClustering(spe = spe, samples = samples, dimRed = dimRed,
batch = batch, reclust = FALSE, ...)
spe <- p1_clustering(spe, dimRed, batch, batch_by, clustParams)
# Neighborhood detection, and sorting if sort = TRUE
outReg <- neighbourDetect(spe = spe, samples = samples, NN = NN,
cells = cells, sort = sort)
outReg <- neighbourDetect(spe, samples, NN, cells, sort)
# Calculating domainness of cell neighborhoods
spe <- entropyMeasure(spe = spe, cells = cells,
regXclust = outReg$regXclust, threads = threads)
spe <- entropyMeasure(spe, cells, outReg$regXclust, threads)
# Weighted smoothing guided by neighbourhood entropy
spe <- adaptiveSmoothing(spe = spe, nnCells = outReg$nnCells, NN = NN,
kernel = kernel, spread = spread,
threads = threads)
spe <- adaptiveSmoothing(spe, outReg$nnCells, NN, kernel, spread, threads)
# Non-spatial clustering of adaptively smoothed expression
# reclust should always be TRUE here
spe <- nsClustering(spe = spe, samples = samples, batch = batch,
reclust = TRUE, ...)
spe <- p2_clustering(spe, batch, batch_by, clustParams)

cluster_df <- data.frame("Cells" = spe[[cells]],
"Clusters" = spe$clustSIGNAL)
Expand All @@ -119,14 +118,8 @@ clustSIGNAL <- function (spe, samples, cells, dimRed = "None", batch = FALSE,
show(time_end - time_start)
if (outputs == "c"){
return (list("clusters" = cluster_df))
} else if (outputs == "n"){
return (list("clusters" = cluster_df,
"neighbours" = outReg$nnCells))
} else if (outputs == "s") {
return (list("clusters" = cluster_df,
"spe_final" = spe))
} else if (outputs == "a") {
return (list("clusters" = cluster_df,
"neighbours" = outReg$nnCells,
"spe_final" = spe))}
"spe_final" = spe))
}
}
110 changes: 0 additions & 110 deletions R/clustering.R

This file was deleted.

3 changes: 2 additions & 1 deletion R/entropyMeasure.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#'
#' @return SpatialExperiment object including entropy values for each cell
#' neighbourhood.
#'
#' @importFrom BiocParallel bplapply
#' @importFrom methods show
#'
Expand All @@ -25,7 +26,7 @@
#'
#' # requires list containing cluster proportions of each region (regXclust),
#' # generated using the neighbourDetect() function
#' spe <- entropyMeasure(spe, cells = "uniqueID", regXclust, threads = 1)
#' spe <- clustSIGNAL::entropyMeasure(spe, cells = "uniqueID", regXclust, threads = 1)
#' head(spe$entropy)
#'
#' @export
Expand Down
75 changes: 75 additions & 0 deletions R/finalClustering.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' Final non-spatial clustering
#'
#' @description
#' A function containing to cluster adaptively smoothed gene expression data.
#'
#' @param spe SpatialExperiment object. For reclust = FALSE, the object should
#' contain logcounts and PCA, but for reculst = TRUE, the object should contain
#' smoothed gene expression.
#' @param batch a logical parameter for whether or not to perform batch
#' correction. Default value is FALSE.
#' @param batch_by a character indicating name of colData(spe) column containing
#' the batch names.
#' @param clustParams a list of parameters for TwoStepParam clustering methods.
#' The clustering parameters are in the order - centers (centers) for clustering
#' with KmeansParam, centers (centers) for sub-clustering clusters with
#' KmeansParam, maximum iterations (iter.max) for clustering with KmeansParam,
#' k values (k) for clustering with NNGraphParam, and community detection method
#' (cluster.fun) to use with NNGraphParam.
#'
#' @return SpatialExperiment object containing clusters generated from smoothed
#' data.
#'
#' @importFrom bluster clusterRows TwoStepParam KmeansParam NNGraphParam
#' @importFrom scater runPCA
#' @importFrom harmony RunHarmony
#' @importFrom SummarizedExperiment colData
#' @importFrom SingleCellExperiment reducedDim
#' @importFrom methods show
#' @importFrom stats setNames
#'
#' @examples
#' data(example)
#'
#' # For non-spatial clustering of normalised counts
#' spe <- clustSIGNAL::p2_clustering(spe, batch = FALSE, batch_by = "None",
#' clustParams = list(0, 0, 30, 5, "louvain"))
#' head(spe$clustSIGNAL)
#'
#' @export

#### Non-spatial clustering
p2_clustering <- function(spe, batch, batch_by, clustParams) {
if (clustParams[[1]] == 0){
# number of centers = 1/5th of total cells in sample
clustVal <- min(as.integer(ncol(spe) / 5), 5000)
} else {
clustVal <- clustParams[[1]]
}
# Clustering adaptively smoothed data
spe <- scater::runPCA(spe, assay.type = "smoothed", name = "PCA.smooth")
if (batch == TRUE) {
emb <- harmony::RunHarmony(data_mat = reducedDim(spe, "PCA.smooth"),
meta_data = colData(spe),
vars_use = batch_by, max.iter = 20,
verbose = FALSE)
mat <- emb
} else {
mat <- reducedDim(spe, "PCA.smooth")}
# reClust <- bluster::clusterRows(mat,
# bluster::TwoStepParam(
# first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
# second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
reClust <- bluster::clusterRows(
mat,
bluster::TwoStepParam(
first = bluster::KmeansParam(centers = clustVal,
iter.max = clustParams[[3]]),
second = bluster::NNGraphParam(k = clustParams[[4]],
cluster.fun = clustParams[[5]])))
spe$clustSIGNAL <- factor(reClust)
show(paste("Nonspatial clustering performed on smoothed data. Clusters =",
length(unique(reClust)), "Time",
format(Sys.time(),'%H:%M:%S')))
return (spe)
}
Loading

0 comments on commit 738dfd6

Please sign in to comment.