Skip to content

Commit

Permalink
Minor updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
PratibhaPanwar committed Sep 8, 2024
1 parent 37a6c03 commit 6b04c97
Show file tree
Hide file tree
Showing 18 changed files with 151 additions and 67 deletions.
32 changes: 12 additions & 20 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ Package: clustSIGNAL
Type: Package
Title: clustSIGNAL: a spatial clustering method
Version: 0.1.0
Author: c(
person(given = "Pratibha", family = "Panwar", email = "[email protected]", role = c("cre", "aut", "ctb")),
Authors@R: c(
person(given = "Pratibha", family = "Panwar", email = "[email protected]", role = c("cre", "aut", "ctb")),
person(given = "Boyi", family = "Guo", email = "[email protected]", role = "aut"),
person(given = "Haowen", family = "Zhao", email = "[email protected]", role = "aut"),
person(given = "Stephanie", family = "Hicks", email = "[email protected]", role = "aut"),
person(given = "Shila", family = "Ghazanfar", email = "[email protected]", role = "aut", "ctb"))
Maintainer: Pratibha Panwar <[email protected]>
person(given = "Shila", family = "Ghazanfar", email = "[email protected]", role = c("aut", "ctb")))
Description: clustSIGNAL: clustering of Spatially Informed Gene expression with Neighbourhood Adapted Learning.
A tool for adaptively smoothing and clustering gene expression data. clustSIGNAL uses entropy to measure
heterogeneity of cell neighbourhoods and performs a weighted, adaptive smoothing, where homogeneous
Expand All @@ -17,31 +16,24 @@ Description: clustSIGNAL: clustering of Spatially Informed Gene expression with
expression data is used for clustering and could be used for other downstream analyses.
License: GPL-2
Encoding: UTF-8
LazyData: true
LazyDataCompression: xz
URL: https://sydneybiox.github.io/clustSIGNAL/, https://sydneybiox.github.io/clustSIGNAL/
URL: https://sydneybiox.github.io/clustSIGNAL/
BugReports: https://github.com/sydneybiox/clustSIGNAL/issues
biocViews: Clustering, Software
Roxygen: list(markdown = TRUE)
biocViews: Clustering, Software, GeneExpression, Spatial
RoxygenNote: 7.3.2
Depends:
R (>= 4.0.0),
SpatialExperiment,
doParallel
Imports:
Depends: R (>= 4.4.0), SpatialExperiment
Imports: BiocParallel,
BiocNeighbors,
bluster,
scater,
SingleCellExperiment,
SummarizedExperiment,
methods
Suggests: knitr,
BiocStyle,
aricode,
cluster,
distances,
dplyr,
ggplot2,
patchwork
Suggests:
knitr,
rmarkdown,
VignetteBuilder:
knitr,
rmarkdown
VignetteBuilder: knitr
22 changes: 22 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,25 @@ export(clustSIGNAL)
export(entropyMeasure)
export(neighbourDetect)
export(nsClustering)
importFrom(BiocNeighbors,KmknnParam)
importFrom(BiocNeighbors,findKNN)
importFrom(BiocParallel,MulticoreParam)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,SnowParam)
importFrom(BiocParallel,bplapply)
importFrom(BiocParallel,bpparam)
importFrom(SingleCellExperiment,logcounts)
importFrom(SingleCellExperiment,reducedDim)
importFrom(SingleCellExperiment,reducedDimNames)
importFrom(SpatialExperiment,spatialCoords)
importFrom(SummarizedExperiment,assay)
importFrom(bluster,KmeansParam)
importFrom(bluster,NNGraphParam)
importFrom(bluster,TwoStepParam)
importFrom(bluster,clusterRows)
importFrom(methods,as)
importFrom(methods,show)
importFrom(scater,runPCA)
importFrom(stats,dexp)
importFrom(stats,dnorm)
importFrom(stats,setNames)
22 changes: 13 additions & 9 deletions R/adaptiveSmoothing.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#' @param kernel a character for type of distribution to be used. The two valid values are "G" or "E". G for Gaussian distribution, and E for exponential distribution. Default value is "G".
#' @param spread a numeric value for distribution spread, represented by standard deviation for Gaussian distribution and rate for exponential distribution. Default value is 0.05 for Gaussian distribution and 20 for exponential distribution.
#' @return SpatialExperiment object including smoothed gene expression values as another assay.
#' @importFrom SingleCellExperiment logcounts
#' @importFrom SummarizedExperiment assay
#' @importFrom methods show as
#'
#' @examples
#' data(example)
Expand All @@ -22,37 +25,38 @@

#### Smoothing
adaptiveSmoothing <- function(spe, nnCells, NN = 30, kernel = "G", spread = 0.05) {
ed = unique(spe$entropy)
gXc = as(logcounts(spe), "sparseMatrix")
ed <- unique(spe$entropy)
gXc <- as(logcounts(spe), "sparseMatrix")
if (kernel == "G") {
# normal distribution weights
# for each unique entropy value
weights = .gauss_kernel(ed, NN + 1, spread)
weights <- .gauss_kernel(ed, NN + 1, spread)
} else if (kernel == "E") {
# exponential distribution weights
# for each unique entropy value
weights = .exp_kernel(ed, NN + 1, spread)
weights <- .exp_kernel(ed, NN + 1, spread)
}

outMats = matrix(nrow = nrow(spe), ncol = 0)
for (val in 1:length(ed)) {
outMats <- matrix(nrow = nrow(spe), ncol = 0)
for (val in seq_len(length(ed))) {
# cells with same entropy value will have same weights
entCells <- colnames(spe[, spe$entropy == ed[val]])
e = paste0("E", ed[val])
e <- paste0("E", ed[val])

if (length(entCells) == 1) {
region <- as.vector(nnCells[entCells, ])
inMat <- as.matrix(gXc[, region])
tmpMat <- .smoothedData(inMat, weights[, e])
colnames(tmpMat) <- entCells
} else {
inMatList = list()
inMatList <- list()
for (x in entCells) {
# names of central cell + NN cells
region <- as.vector(nnCells[x, ])
# gene expression matrix of NN cells
inMatList[[x]] <- as.matrix(gXc[, region])
}
# BPPARAM <- .generateBPParam(cores = threads)
out <- lapply(inMatList, .smoothedData, weight = weights[, e])
tmpMat <- matrix(unlist(out),
ncol = length(out),
Expand All @@ -74,7 +78,7 @@ adaptiveSmoothing <- function(spe, nnCells, NN = 30, kernel = "G", spread = 0.05
} else {
# add data to spatial experiment
assay(spe, "smoothed") <- as(smoothMat, "sparseMatrix")
print(paste("Smoothing performed. NN =", NN, "Kernel =", kernel, "Spread =", spread, Sys.time()))
show(paste("Smoothing performed. NN =", NN, "Kernel =", kernel, "Spread =", spread, Sys.time()))
}
return(spe)
}
Expand Down
11 changes: 7 additions & 4 deletions R/clustSIGNAL.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#' 2. neighbours: a matrix of cell names and the names of their NN nearest neighbour cells.
#'
#' 3. spe_final: a SpatialExperiment object with initial 'putative cell type' groups, entropy values, smoothed gene expression, post-smoothing clusters, and silhouette widths included.
#' @importFrom SpatialExperiment spatialCoords
#' @importFrom SingleCellExperiment reducedDimNames
#' @importFrom methods show
#'
#' @examples
#' data(example)
Expand Down Expand Up @@ -66,14 +69,14 @@ clustSIGNAL <- function (spe,
}

if (dimRed == "None") {
print("Calculating PCA to use as reduced dimension input.")
show("Calculating PCA to use as reduced dimension input.")
spe <- scater::runPCA(spe)
dimRed = "PCA"
dimRed <- "PCA"
} else if (!(dimRed %in% reducedDimNames(spe))){
stop("The specified reduced dimension data does not exist.")
}

print(paste("clustSIGNAL run started.", Sys.time()))
show(paste("clustSIGNAL run started.", Sys.time()))

# Non-spatial clustering to identify 'putative celltype' groups
# reclust should always be FALSE here
Expand All @@ -98,7 +101,7 @@ clustSIGNAL <- function (spe,

cluster_df <- data.frame("Cells" = spe[[cells]], "Clusters" = spe$reCluster)

print(paste("clustSIGNAL run completed.", Sys.time()))
show(paste("clustSIGNAL run completed.", Sys.time()))

if (outputs == "c"){
return (list("clusters" = cluster_df))
Expand Down
13 changes: 9 additions & 4 deletions R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
#' @param ... additional parameters for TwoStepParam clustering methods. Include parameters like k for number of nearest neighbours and cluster.fun for selecting community detection method. Default values k = 5, cluster.fun = "louvain".
#'
#' @return SpatialExperiment object containing 'putative cell type' group allotted to each cell (reclust = FALSE) or clusters generated from smoothed data (reclust = TRUE).
#' @importFrom bluster clusterRows TwoStepParam KmeansParam NNGraphParam
#' @importFrom scater runPCA
#' @importFrom SingleCellExperiment reducedDim
#' @importFrom methods show
#' @importFrom stats setNames
#'
#' @examples
#' data(example)
Expand All @@ -33,11 +38,11 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
spe$nsCluster <- factor(nsClust)
print(paste("Initial nonspatial clustering performed. Clusters =", length(unique(nsClust)), Sys.time()))
show(paste("Initial nonspatial clustering performed. Clusters =", length(unique(nsClust)), Sys.time()))
# Initial subclustering
clusters <- length(unique(spe$nsCluster))
subclusters_list = list()
for (c in 1:clusters){
for (c in seq_len(clusters)){
speX <- spe[, spe$nsCluster == c]
speX <- scater::runPCA(speX)
matX <- reducedDim(speX, "PCA")
Expand All @@ -51,7 +56,7 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
subclusters_list[[c]] <- setNames(paste0(c, ".", nsClustX), colnames(speX))
}
spe$nsSubcluster <- factor(unlist(subclusters_list)[colnames(spe)])
print(paste("Nonspatial subclustering performed. Subclusters =", length(unique(spe$nsSubcluster)), Sys.time()))
show(paste("Nonspatial subclustering performed. Subclusters =", length(unique(spe$nsSubcluster)), Sys.time()))

} else if (reclust == TRUE) {
# Clustering adaptively smoothed data
Expand All @@ -64,7 +69,7 @@ nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
first = bluster::KmeansParam(centers = clustVal, iter.max = 30),
second = bluster::NNGraphParam(k = 5, cluster.fun = "louvain")))
spe$reCluster <- factor(reClust)
print(paste("Nonspatial clustering performed on smoothed data. Clusters =", length(unique(reClust)), Sys.time()))
show(paste("Nonspatial clustering performed on smoothed data. Clusters =", length(unique(reClust)), Sys.time()))
}
return (spe)
}
3 changes: 3 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ NULL
#' \code{regXclust} a list where each element corresponds to a cell in spe object,
#' and contains the cluster composition proportions.
#' @usage data(example)
#' @source Integration of spatial and single-cell transcriptomic data elucidates mouse
#' organogenesis, \emph{Nature Biotechnology}, 2022.
#' Webpage: \url{https://www.nature.com/articles/s41587-021-01006-2}
#' @keywords datasets
NULL

Expand Down
8 changes: 3 additions & 5 deletions R/expKernel.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,16 @@
#' @param ed a numeric value for entropy of the neighbourhood.
#' @param NN an integer for the number of neighbourhood cells the function should consider. The value must be greater than or equal to 1. Default value is 30.
#' @param rate a numeric value for rate of exponential distribution.
#' @importFrom stats dexp
#'
#' @return a numeric vector of weights.
#'
#' @keywords internal
#' @return a matrix where each column contains weights related to specific entropy values.

# function to retrieve exponential distribution weights for neighbors
.exp_kernel <- function(ed, NN, rate) {
# distribution from 0 to entropy, with cells in smoothing radius as cut points

# cutpoints <- seq(0, ed, length.out = NN)
# return(as.matrix(dexp(cutpoints, rate = rate)))
weights = dexp(sapply(ed, function(x) seq(0, x, length.out = NN)), rate = rate)
weights <- dexp(sapply(ed, function(x) seq(0, x, length.out = NN)), rate = rate)
colnames(weights) <- paste0("E", ed)
return (weights)
}
9 changes: 3 additions & 6 deletions R/gaussKernel.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,15 @@
#' @param ed a numeric value for entropy of the neighbourhood.
#' @param NN an integer for the number of neighbourhood cells the function should consider. The value must be greater than or equal to 1. Default value is 30.
#' @param sd a numeric value for standard deviation of Gaussian distribution.
#'
#' @return a numeric vector of weights.
#'
#' @keywords internal
#' @importFrom stats dnorm
#' @return a matrix where each column contains weights related to specific entropy values.

# function to retrieve normal distribution weights for neighbors
.gauss_kernel <- function(ed, NN, sd) {
# distribution from 0 to entropy, with cells in smoothing radius as cut points

# cutpoints <- seq(0, ed, length.out = NN)
# return(as.matrix(dnorm(cutpoints, sd = sd)))
weights = dnorm(sapply(ed, function(x) seq(0, x, length.out = NN)), sd = sd)
weights <- dnorm(sapply(ed, function(x) seq(0, x, length.out = NN)), sd = sd)
colnames(weights) <- paste0("E", ed)
return (weights)
}
9 changes: 6 additions & 3 deletions R/neighborDetect.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
#' 1. nnCells, a character matrix of NN nearest neighbours - rows are cells and columns are their nearest neighbours ranged from closest to farthest neighbour. For sort = TRUE, the neighbours belonging to the same 'putative cell type' group as the cell are moved closer to it.
#'
#' 2. regXclust, a list of vectors for each cell's neighbourhood composition indicated by the proportion of 'putative cell type' groups it contains.
#
#' @importFrom BiocNeighbors findKNN KmknnParam
#' @importFrom SpatialExperiment spatialCoords
#' @importFrom methods show
#'
#' @examples
#' data(example)
#'
Expand All @@ -41,7 +44,7 @@ neighbourDetect <- function(spe, samples, NN = 30, cells, sort = TRUE) {
nnMatlist <- BiocNeighbors::findKNN(xy_pos, k = NN, BNPARAM = BiocNeighbors::KmknnParam())
rownames(nnMatlist$index) <- rownames(subClust)
# adding central cell indices as first column of matrix of NN neighborhood indices
nnMatlist$indexNew <- cbind(seq(1:nrow(nnMatlist$index)), nnMatlist$index)
nnMatlist$indexNew <- cbind(seq_len(nrow(nnMatlist$index)), nnMatlist$index)
if (sort == TRUE) {
nnCells <- rbind(nnCells, t(apply(nnMatlist$indexNew, 1, .cellNameSort, Clust = Clust)))
} else if (sort == FALSE) {
Expand Down Expand Up @@ -73,7 +76,7 @@ neighbourDetect <- function(spe, samples, NN = 30, cells, sort = TRUE) {
} else if (check.cells.NA != 0 | check.clusts.NA != 0) {
stop("Missing values in neighbor data.")
} else {
print(paste("Regions defined.", Sys.time()))
show(paste("Regions defined.", Sys.time()))
}
return(list("nnCells" = nnCells,
"regXclust" = regXclust))
Expand Down
2 changes: 1 addition & 1 deletion R/neighbourComposition.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# function to retrieve cluster proportions of each region
.calculateProp <- function(arr) {
prop = prop.table(table(arr))
prop <- prop.table(table(arr))
if (round(sum(prop)) != 1){ # check if the proportions for each region sum up to 1
stop(paste("Proportions are incorrect.", "row =", c, "proportion =", sum(arr)))
}
Expand Down
4 changes: 2 additions & 2 deletions R/neighbourSort.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
cellDF <- data.frame(cellName = rownames(Clust)[cell], cluster = Clust[cell, 1])
if (length(unique(Clust[cell,])) == 1) { # all cells belong to same putative cell type
return(rownames(Clust)[cell])
} else if (cellDF$cluster[1] %in% cellDF$cluster[2:length(cell)]) {
} else if (cellDF$cluster[1] %in% cellDF$cluster[seq(2, length(cell))]) {
centralClust <- cellDF$cluster[1]
sameClust <- c()
diffClust <- c()
for (c in 1:nrow(cellDF)) {
for (c in seq_len(nrow(cellDF))) {
if (cellDF$cluster[c] == centralClust){
sameClust <- append(sameClust, cellDF$cellName[c])
} else {
Expand Down
36 changes: 36 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' Exponential distribution weights
#'
#' @description
#' A utility function to generate BPPARAM object.
#'
#' @param cores Desired number of cores for BPPARAM object.
#' @return A BPPPARAM object.
#' @importFrom BiocParallel SerialParam SnowParam MulticoreParam bpparam

.generateBPParam <- function(cores = 1) {
seed <- .Random.seed[1]
if (cores == 1) {
BPparam <- BiocParallel::SerialParam(RNGseed = seed)
} else { ## Parallel processing is desired.
## Also set the BPparam RNGseed if the user ran set.seed(someNumber) themselves.
if (Sys.info()["sysname"] == "Windows") { # Only SnowParam suits Windows.
BPparam <- BiocParallel::SnowParam(min(
cores,
BiocParallel::snowWorkers("SOCK")
),
RNGseed = seed
)
} else if (Sys.info()["sysname"] %in% c("MacOS", "Linux")) {
BPparam <- BiocParallel::MulticoreParam(min(
cores,
BiocParallel::multicoreWorkers()
),
RNGseed = seed
)
## Multicore is faster than SNOW, but it doesn't work on Windows.
} else { ## Something weird.
BPparam <- BiocParallel::bpparam() ## BiocParallel will figure it out.
}
}
BPparam
}
11 changes: 6 additions & 5 deletions man/clustSIGNAL.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6b04c97

Please sign in to comment.