Minor updates.

SydneyBioX · Sep 8, 2024 · 6b04c97 · 6b04c97
1 parent 37a6c03
commit 6b04c97
Show file tree

Hide file tree

Showing 18 changed files with 151 additions and 67 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,13 +2,12 @@ Package: clustSIGNAL
 Type: Package
 Title: clustSIGNAL: a spatial clustering method
 Version: 0.1.0
-Author: c(
-    person(given = "Pratibha", family = "Panwar", email = "[email protected]", role = c("cre", "aut", "ctb")),
+Authors@R: c(
+    person(given = "Pratibha", family = "Panwar", email = "[email protected]", role = c("cre", "aut", "ctb")),
     person(given = "Boyi", family = "Guo", email = "[email protected]", role = "aut"),
     person(given = "Haowen", family = "Zhao", email = "[email protected]", role = "aut"),
     person(given = "Stephanie", family = "Hicks", email = "[email protected]", role = "aut"),
-    person(given = "Shila", family = "Ghazanfar", email = "[email protected]", role = "aut", "ctb"))
-Maintainer: Pratibha Panwar <[email protected]>
+    person(given = "Shila", family = "Ghazanfar", email = "[email protected]", role = c("aut", "ctb")))
 Description: clustSIGNAL: clustering of Spatially Informed Gene expression with Neighbourhood Adapted Learning. 
     A tool for adaptively smoothing and clustering gene expression data. clustSIGNAL uses entropy to measure 
     heterogeneity of cell neighbourhoods and performs a weighted, adaptive smoothing, where homogeneous 
@@ -17,31 +16,24 @@ Description: clustSIGNAL: clustering of Spatially Informed Gene expression with
     expression data is used for clustering and could be used for other downstream analyses.
 License: GPL-2
 Encoding: UTF-8
-LazyData: true
-LazyDataCompression: xz
-URL: https://sydneybiox.github.io/clustSIGNAL/, https://sydneybiox.github.io/clustSIGNAL/
+URL: https://sydneybiox.github.io/clustSIGNAL/
 BugReports: https://github.com/sydneybiox/clustSIGNAL/issues
-biocViews: Clustering, Software 
-Roxygen: list(markdown = TRUE)
+biocViews: Clustering, Software, GeneExpression, Spatial
 RoxygenNote: 7.3.2
-Depends: 
-    R (>= 4.0.0), 
-    SpatialExperiment, 
-    doParallel
-Imports: 
+Depends: R (>= 4.4.0), SpatialExperiment
+Imports: BiocParallel,
     BiocNeighbors, 
     bluster, 
     scater,
+    SingleCellExperiment,
+    SummarizedExperiment,
+    methods
+Suggests: knitr,
     BiocStyle,
     aricode,
     cluster,
     distances,
     dplyr,
     ggplot2, 
     patchwork
-Suggests: 
-    knitr, 
-    rmarkdown,
-VignetteBuilder: 
-    knitr, 
-    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,3 +5,25 @@ export(clustSIGNAL)
 export(entropyMeasure)
 export(neighbourDetect)
 export(nsClustering)
+importFrom(BiocNeighbors,KmknnParam)
+importFrom(BiocNeighbors,findKNN)
+importFrom(BiocParallel,MulticoreParam)
+importFrom(BiocParallel,SerialParam)
+importFrom(BiocParallel,SnowParam)
+importFrom(BiocParallel,bplapply)
+importFrom(BiocParallel,bpparam)
+importFrom(SingleCellExperiment,logcounts)
+importFrom(SingleCellExperiment,reducedDim)
+importFrom(SingleCellExperiment,reducedDimNames)
+importFrom(SpatialExperiment,spatialCoords)
+importFrom(SummarizedExperiment,assay)
+importFrom(bluster,KmeansParam)
+importFrom(bluster,NNGraphParam)
+importFrom(bluster,TwoStepParam)
+importFrom(bluster,clusterRows)
+importFrom(methods,as)
+importFrom(methods,show)
+importFrom(scater,runPCA)
+importFrom(stats,dexp)
+importFrom(stats,dnorm)
+importFrom(stats,setNames)
diff --git a/R/adaptiveSmoothing.R b/R/adaptiveSmoothing.R
@@ -9,6 +9,9 @@
 #' @param kernel a character for type of distribution to be used. The two valid values are "G" or "E". G for Gaussian distribution, and E for exponential distribution. Default value is "G".
 #' @param spread a numeric value for distribution spread, represented by standard deviation for Gaussian distribution and rate for exponential distribution. Default value is 0.05 for Gaussian distribution and 20 for exponential distribution.
 #' @return SpatialExperiment object including smoothed gene expression values as another assay.
+#' @importFrom SingleCellExperiment logcounts
+#' @importFrom SummarizedExperiment assay
+#' @importFrom methods show as
 #'
 #' @examples
 #' data(example)
@@ -22,37 +25,38 @@
 
 #### Smoothing
 adaptiveSmoothing <- function(spe, nnCells, NN = 30, kernel = "G", spread = 0.05) {
-    ed = unique(spe$entropy)
-    gXc = as(logcounts(spe), "sparseMatrix")
+    ed <- unique(spe$entropy)
+    gXc <- as(logcounts(spe), "sparseMatrix")
     if (kernel == "G") {
         # normal distribution weights
         # for each unique entropy value
-        weights = .gauss_kernel(ed, NN + 1, spread)
+        weights <- .gauss_kernel(ed, NN + 1, spread)
     } else if (kernel == "E") {
         # exponential distribution weights
         # for each unique entropy value
-        weights = .exp_kernel(ed, NN + 1, spread)
+        weights <- .exp_kernel(ed, NN + 1, spread)
     }
 
-    outMats = matrix(nrow = nrow(spe), ncol = 0)
-    for (val in 1:length(ed)) {
+    outMats <- matrix(nrow = nrow(spe), ncol = 0)
+    for (val in seq_len(length(ed))) {
         # cells with same entropy value will have same weights
         entCells <- colnames(spe[, spe$entropy == ed[val]])
-        e = paste0("E", ed[val])
+        e <- paste0("E", ed[val])
 
         if (length(entCells) == 1) {
             region <- as.vector(nnCells[entCells, ])
             inMat <- as.matrix(gXc[, region])
             tmpMat <- .smoothedData(inMat, weights[, e])
             colnames(tmpMat) <- entCells
         } else {
-            inMatList = list()
+            inMatList <- list()
             for (x in entCells) {
                 # names of central cell + NN cells
                 region <- as.vector(nnCells[x, ])
                 # gene expression matrix of NN cells
                 inMatList[[x]] <- as.matrix(gXc[, region])
             }
+            # BPPARAM <- .generateBPParam(cores = threads)
             out <- lapply(inMatList, .smoothedData, weight = weights[, e])
             tmpMat <- matrix(unlist(out),
                           ncol = length(out),
@@ -74,7 +78,7 @@ adaptiveSmoothing <- function(spe, nnCells, NN = 30, kernel = "G", spread = 0.05
     } else {
         # add data to spatial experiment
         assay(spe, "smoothed") <- as(smoothMat, "sparseMatrix")
-        print(paste("Smoothing performed. NN =", NN, "Kernel =", kernel, "Spread =", spread, Sys.time()))
+        show(paste("Smoothing performed. NN =", NN, "Kernel =", kernel, "Spread =", spread, Sys.time()))
     }
     return(spe)
 }

diff --git a/R/clustSIGNAL.R b/R/clustSIGNAL.R
@@ -22,6 +22,9 @@
 #' 2. neighbours: a matrix of cell names and the names of their NN nearest neighbour cells.
 #'
 #' 3. spe_final: a SpatialExperiment object with initial 'putative cell type' groups, entropy values, smoothed gene expression, post-smoothing clusters, and silhouette widths included.
+#' @importFrom SpatialExperiment spatialCoords
+#' @importFrom SingleCellExperiment reducedDimNames
+#' @importFrom methods show
 #'
 #' @examples
 #' data(example)
@@ -66,14 +69,14 @@ clustSIGNAL <- function (spe,
     }
 
     if (dimRed == "None") {
-        print("Calculating PCA to use as reduced dimension input.")
+        show("Calculating PCA to use as reduced dimension input.")
         spe <- scater::runPCA(spe)
-        dimRed = "PCA"
+        dimRed <- "PCA"
     } else if (!(dimRed %in% reducedDimNames(spe))){
         stop("The specified reduced dimension data does not exist.")
     }
 
-    print(paste("clustSIGNAL run started.", Sys.time()))
+    show(paste("clustSIGNAL run started.", Sys.time()))
 
     # Non-spatial clustering to identify 'putative celltype' groups
     # reclust should always be FALSE here
@@ -98,7 +101,7 @@ clustSIGNAL <- function (spe,
 
     cluster_df <- data.frame("Cells" = spe[[cells]], "Clusters" = spe$reCluster)
 
-    print(paste("clustSIGNAL run completed.", Sys.time()))
+    show(paste("clustSIGNAL run completed.", Sys.time()))
 
     if (outputs == "c"){
         return (list("clusters" = cluster_df))

diff --git a/R/clustering.R b/R/clustering.R
@@ -9,6 +9,11 @@
 #' @param ... additional parameters for TwoStepParam clustering methods. Include parameters like k for number of nearest neighbours and cluster.fun for selecting community detection method. Default values k = 5, cluster.fun = "louvain".
 #'
 #' @return SpatialExperiment object containing 'putative cell type' group allotted to each cell (reclust = FALSE) or clusters generated from smoothed data (reclust = TRUE).
+#' @importFrom bluster clusterRows TwoStepParam KmeansParam NNGraphParam
+#' @importFrom scater runPCA
+#' @importFrom SingleCellExperiment reducedDim
+#' @importFrom methods show
+#' @importFrom stats setNames
 #'
 #' @examples
 #' data(example)
@@ -33,11 +38,11 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
                                             first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
                                             second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
         spe$nsCluster <- factor(nsClust)
-        print(paste("Initial nonspatial clustering performed. Clusters =", length(unique(nsClust)), Sys.time()))
+        show(paste("Initial nonspatial clustering performed. Clusters =", length(unique(nsClust)), Sys.time()))
         # Initial subclustering
         clusters <- length(unique(spe$nsCluster))
         subclusters_list = list()
-        for (c in 1:clusters){
+        for (c in seq_len(clusters)){
             speX <- spe[, spe$nsCluster == c]
             speX <- scater::runPCA(speX)
             matX <- reducedDim(speX, "PCA")
@@ -51,7 +56,7 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
             subclusters_list[[c]] <- setNames(paste0(c, ".", nsClustX), colnames(speX))
         }
         spe$nsSubcluster <- factor(unlist(subclusters_list)[colnames(spe)])
-        print(paste("Nonspatial subclustering performed. Subclusters =", length(unique(spe$nsSubcluster)), Sys.time()))
+        show(paste("Nonspatial subclustering performed. Subclusters =", length(unique(spe$nsSubcluster)), Sys.time()))
 
     } else if (reclust == TRUE) {
         # Clustering adaptively smoothed data
@@ -64,7 +69,7 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
                                             first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
                                             second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
         spe$reCluster <- factor(reClust)
-        print(paste("Nonspatial clustering performed on smoothed data. Clusters =", length(unique(reClust)), Sys.time()))
+        show(paste("Nonspatial clustering performed on smoothed data. Clusters =", length(unique(reClust)), Sys.time()))
     }
     return (spe)
 }
diff --git a/R/data.R b/R/data.R
@@ -44,6 +44,9 @@ NULL
 #' \code{regXclust} a list where each element corresponds to a cell in spe object,
 #' and contains the cluster composition proportions.
 #' @usage data(example)
+#' @source Integration of spatial and single-cell transcriptomic data elucidates mouse
+#' organogenesis, \emph{Nature Biotechnology}, 2022.
+#' Webpage: \url{https://www.nature.com/articles/s41587-021-01006-2}
 #' @keywords datasets
 NULL
 

diff --git a/R/expKernel.R b/R/expKernel.R
@@ -6,18 +6,16 @@
 #' @param ed a numeric value for entropy of the neighbourhood.
 #' @param NN an integer for the number of neighbourhood cells the function should consider. The value must be greater than or equal to 1. Default value is 30.
 #' @param rate a numeric value for rate of exponential distribution.
+#' @importFrom stats dexp
 #'
-#' @return a numeric vector of weights.
-#'
-#' @keywords internal
+#' @return a matrix where each column contains weights related to specific entropy values.
 
-# function to retrieve exponential distribution weights for neighbors
 .exp_kernel <- function(ed, NN, rate) {
     # distribution from 0 to entropy, with cells in smoothing radius as cut points
 
     # cutpoints <- seq(0, ed, length.out = NN)
     # return(as.matrix(dexp(cutpoints, rate = rate)))
-    weights = dexp(sapply(ed, function(x) seq(0, x, length.out = NN)), rate = rate)
+    weights <- dexp(sapply(ed, function(x) seq(0, x, length.out = NN)), rate = rate)
     colnames(weights) <- paste0("E", ed)
     return (weights)
 }
diff --git a/R/gaussKernel.R b/R/gaussKernel.R
@@ -6,18 +6,15 @@
 #' @param ed a numeric value for entropy of the neighbourhood.
 #' @param NN an integer for the number of neighbourhood cells the function should consider. The value must be greater than or equal to 1. Default value is 30.
 #' @param sd a numeric value for standard deviation of Gaussian distribution.
-#'
-#' @return a numeric vector of weights.
-#'
-#' @keywords internal
+#' @importFrom stats dnorm
+#' @return a matrix where each column contains weights related to specific entropy values.
 
-# function to retrieve normal distribution weights for neighbors
 .gauss_kernel <- function(ed, NN, sd) {
     # distribution from 0 to entropy, with cells in smoothing radius as cut points
 
     # cutpoints <- seq(0, ed, length.out = NN)
     # return(as.matrix(dnorm(cutpoints, sd = sd)))
-    weights = dnorm(sapply(ed, function(x) seq(0, x, length.out = NN)), sd = sd)
+    weights <- dnorm(sapply(ed, function(x) seq(0, x, length.out = NN)), sd = sd)
     colnames(weights) <- paste0("E", ed)
     return (weights)
 }
diff --git a/R/neighborDetect.R b/R/neighborDetect.R
@@ -14,7 +14,10 @@
 #' 1. nnCells, a character matrix of NN nearest neighbours - rows are cells and columns are their nearest neighbours ranged from closest to farthest neighbour. For sort = TRUE, the neighbours belonging to the same 'putative cell type' group as the cell are moved closer to it.
 #'
 #' 2. regXclust, a list of vectors for each cell's neighbourhood composition indicated by the proportion of 'putative cell type' groups it contains.
-#
+#' @importFrom BiocNeighbors findKNN KmknnParam
+#' @importFrom SpatialExperiment spatialCoords
+#' @importFrom methods show
+#'
 #' @examples
 #' data(example)
 #'
@@ -41,7 +44,7 @@ neighbourDetect <- function(spe, samples, NN = 30, cells, sort = TRUE) {
         nnMatlist <- BiocNeighbors::findKNN(xy_pos, k = NN, BNPARAM = BiocNeighbors::KmknnParam())
         rownames(nnMatlist$index) <- rownames(subClust)
         # adding central cell indices as first column of matrix of NN neighborhood indices
-        nnMatlist$indexNew <- cbind(seq(1:nrow(nnMatlist$index)), nnMatlist$index)
+        nnMatlist$indexNew <- cbind(seq_len(nrow(nnMatlist$index)), nnMatlist$index)
         if (sort == TRUE) {
             nnCells <- rbind(nnCells, t(apply(nnMatlist$indexNew, 1, .cellNameSort, Clust = Clust)))
         } else if (sort == FALSE) {
@@ -73,7 +76,7 @@ neighbourDetect <- function(spe, samples, NN = 30, cells, sort = TRUE) {
     } else if (check.cells.NA != 0 | check.clusts.NA != 0) {
         stop("Missing values in neighbor data.")
     } else {
-        print(paste("Regions defined.", Sys.time()))
+        show(paste("Regions defined.", Sys.time()))
     }
     return(list("nnCells" = nnCells,
                 "regXclust" = regXclust))

diff --git a/R/neighbourComposition.R b/R/neighbourComposition.R
@@ -11,7 +11,7 @@
 
 # function to retrieve cluster proportions of each region
 .calculateProp <- function(arr) {
-    prop = prop.table(table(arr))
+    prop <- prop.table(table(arr))
     if (round(sum(prop)) != 1){ # check if the proportions for each region sum up to 1
         stop(paste("Proportions are incorrect.", "row =", c, "proportion =", sum(arr)))
     }

diff --git a/R/neighbourSort.R b/R/neighbourSort.R
@@ -15,11 +15,11 @@
     cellDF <- data.frame(cellName = rownames(Clust)[cell], cluster = Clust[cell, 1])
     if (length(unique(Clust[cell,])) == 1) { # all cells belong to same putative cell type
         return(rownames(Clust)[cell])
-    } else if (cellDF$cluster[1] %in% cellDF$cluster[2:length(cell)]) {
+    } else if (cellDF$cluster[1] %in% cellDF$cluster[seq(2, length(cell))]) {
         centralClust <- cellDF$cluster[1]
         sameClust <- c()
         diffClust <- c()
-        for (c in 1:nrow(cellDF)) {
+        for (c in seq_len(nrow(cellDF))) {
             if (cellDF$cluster[c] == centralClust){
                 sameClust <- append(sameClust, cellDF$cellName[c])
             } else {

diff --git a/R/utilities.R b/R/utilities.R
@@ -0,0 +1,36 @@
+#' Exponential distribution weights
+#'
+#' @description
+#' A utility function to generate BPPARAM object.
+#'
+#' @param cores Desired number of cores for BPPARAM object.
+#' @return A BPPPARAM object.
+#' @importFrom BiocParallel SerialParam SnowParam MulticoreParam bpparam
+
+.generateBPParam <- function(cores = 1) {
+    seed <- .Random.seed[1]
+    if (cores == 1) {
+        BPparam <- BiocParallel::SerialParam(RNGseed = seed)
+    } else { ## Parallel processing is desired.
+        ## Also set the BPparam RNGseed if the user ran set.seed(someNumber) themselves.
+        if (Sys.info()["sysname"] == "Windows") { # Only SnowParam suits Windows.
+            BPparam <- BiocParallel::SnowParam(min(
+                cores,
+                BiocParallel::snowWorkers("SOCK")
+            ),
+            RNGseed = seed
+            )
+        } else if (Sys.info()["sysname"] %in% c("MacOS", "Linux")) {
+            BPparam <- BiocParallel::MulticoreParam(min(
+                cores,
+                BiocParallel::multicoreWorkers()
+            ),
+            RNGseed = seed
+            )
+            ## Multicore is faster than SNOW, but it doesn't work on Windows.
+        } else { ## Something weird.
+            BPparam <- BiocParallel::bpparam() ## BiocParallel will figure it out.
+        }
+    }
+    BPparam
+}
diff --git a/man/clustSIGNAL.Rd b/man/clustSIGNAL.Rd