Skip to content

Commit

Permalink
add interop
Browse files Browse the repository at this point in the history
  • Loading branch information
shazanfar committed Sep 27, 2024
1 parent 4ca82b2 commit a737ef5
Show file tree
Hide file tree
Showing 16 changed files with 502 additions and 71 deletions.
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ Imports: Rfast,
doBy,
BiocParallel,
rlang,
methods
methods,
BiocGenerics,
SummarizedExperiment,
SingleCellExperiment,
SpatialExperiment
Suggests: knitr,
BiocStyle,
class
Expand Down
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# Generated by roxygen2: do not edit by hand

export(calculatewSIR)
export(exploreWSIRParams)
export(findTopGenes)
export(generateUmapFromWSIR)
export(plotUmapFromWSIR)
export(projectWSIR)
export(runwSIR)
export(visualiseWSIRDirections)
export(wSIR)
importFrom(BiocGenerics,t)
importFrom(BiocParallel,MulticoreParam)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(Rcpp,evalCpp)
importFrom(Rfast,dcor)
importFrom(SingleCellExperiment,reducedDim)
importFrom(SingleCellExperiment,reducedDims)
importFrom(SpatialExperiment,spatialCoords)
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(distances,distances)
importFrom(doBy,which.maxn)
importFrom(ggplot2,aes)
Expand All @@ -25,6 +35,7 @@ importFrom(ggplot2,scale_color_gradient)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_minimal)
importFrom(magrittr,"%>%")
importFrom(methods,as)
importFrom(methods,is)
importFrom(rlang,.data)
importFrom(stats,cor)
Expand Down
99 changes: 99 additions & 0 deletions R/calculatewSIR.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#' calculatewSIR
#'
#' @description
#' Perform wSIR on cells, based on the expression data and a reducedDim in a
#' SingleCellExperiment or SpatialExperiment object
#'
#' @param x A numeric matrix of normalised gene expression data where rows are
#' features and columns are cells. Alternatively, a SingleCellExperiment or
#' SpatialExperiment containing such a matrix
#' @param assay.type if `x` is a SingleCellExperiment of SpatialExperiment then
#' this is the assay for which wSIR will be calculated. Default "logcounts".
#' @param dimred String or integer scalar specifying the dimensionality reduction
#' slot for which to use for the slicing mechanism. Ignored if `coords` given.
#' @param colData_columns character vector specifying the subset of colData
#' columns to be used for the wSIR slicing mechanism. Ignored if `coords` or
#' `dimred` given
#' @param spatialCoords logical indicating if spatialCoords should be used for
#' the wSIR slicing mechanism. Ignored if `coords`, `dimred`, or
#' `colData_columns` given, or if `x` is not a SpatialExperiment object.
#' @param ... arguments passing to `wSIR`
#'
#' @return A wSIR object
#'
#' @importFrom SummarizedExperiment assay assays colData
#' @importFrom SingleCellExperiment reducedDim reducedDims
#' @importFrom SpatialExperiment spatialCoords
#' @importFrom methods is as
#' @importFrom BiocGenerics t
#'
#' @examples
#' data(MouseData)
#' library(SingleCellExperiment)
#' sce = SingleCellExperiment(assays = list(logcounts = t(sample1_exprs)),
#' reducedDims = list(spatial = sample1_coords))
#'
#' obj = calculatewSIR(x = sce,
#' dimred = "spatial")
#'
#' @export
calculatewSIR <- function(x,
assay.type = "logcounts",
dimred = NULL,
colData_columns = NULL,
spatialCoords = FALSE,
...) {

isMatLike <- methods::is(x, "matrix")

if (isMatLike) {

wsir_obj <- wSIR(X = x, ...)

return(wsir_obj)

}

if (!assay.type %in% names(SummarizedExperiment::assays(x))) {
stop("assay.type not within assays of x")
}

X <- SummarizedExperiment::assay(x, assay.type)

if (!is.null(dimred)) {

if (!dimred %in% names(SingleCellExperiment::reducedDims(x))) {
stop("dimred not within reducedDims of x")
}

coords <- SingleCellExperiment::reducedDim(x, dimred)

} else {

if (!is.null(colData_columns)) {

if (!all(colData_columns %in% colnames(SummarizedExperiment::colData(x)))) {
stop("not all colData_columns names are in colnames(colData(x))")
}

coords <- SummarizedExperiment::colData(x)[,colData_columns, drop = FALSE]
coords <- methods::as(coords, "data.frame")

} else {

if (!is.null(spatialCoords)) {

coords <- SpatialExperiment::spatialCoords(x)
coords <- base::as.data.frame(coords)
colnames(coords)[seq_len(2)] <- c("x", "y")

}

}

}

wsir_obj <- wSIR(X = BiocGenerics::t(X), coords = coords, ...)

return(wsir_obj)
}
167 changes: 132 additions & 35 deletions R/exploreWSIRParams.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@
#' embedding. Default is "DC".
#' @param nrep integer for the number of train/test splits of the data to
#' perform.
#' @param param parallel computing setup for bplapply from BiocParallel
#' @param BPPARAM parallel computing setup for bplapply from BiocParallel
#' package. Default is to use a single core, hence
#' default value is MulticoreParam(workers = 1)
#' default value is SerialParam()
#' @param plot logical whether a dotplot of parameters and metrics should be
#' produced, default TRUE
#' @param verbose default TRUE
#' @param ... arguments passed on to wSIROptimisation
#'
#' @return List with five slots, named "plot", "message", "best_alpha",
Expand Down Expand Up @@ -87,7 +90,7 @@
#' @importFrom magrittr %>%
#' @importFrom ggplot2 ggplot aes geom_point theme_classic ggtitle
#' @importFrom vctrs vec_rep_each
#' @importFrom BiocParallel bplapply MulticoreParam
#' @importFrom BiocParallel SerialParam SnowParam MulticoreParam bpparam bplapply
#' @importFrom stringr word
#'
#' @export
Expand All @@ -98,14 +101,21 @@ exploreWSIRParams <- function(X,
optim_slices = c(5,10,15,20),
metric = "DC",
nrep = 5,
param = MulticoreParam(workers = 1),
# BPPARAM = SerialParam(),
nCores = 1,
plot = TRUE,
verbose = TRUE,
...
) {

# vector of all parameter combinations
param_combinations <- expand.grid(slices = optim_slices, alpha = optim_alpha,
metric = NA)
BPPARAM <- .generateBPParam(cores = nCores)

# vector of all parameter combinations
param_combinations <- expand.grid(slices = optim_slices,
alpha = optim_alpha,
rep = seq_len(nrep)
# metric = NA)
)

# # perform bplapply over that list of (pairs of) combinations
# metric_vals_list = bplapply(param_combinations, function(parameter_pair) {
Expand All @@ -130,52 +140,139 @@ exploreWSIRParams <- function(X,
nrow = nrow(X), ncol = nrep
)
# create training and test set from each column index
split_list <- apply(index_rep, 2, function(keep){
split_list <- apply(index_rep, 2, function(keep) {
X_train <- X[keep,]
coords_train <- coords[keep,]
samples_train <- samples[keep]
X_test <- X[!keep,]
coords_test <- coords[!keep,]
list(X_train, coords_train, X_test,
coords_test, samples_train )
list(X_train,
coords_train,
X_test,
coords_test,
samples_train)
})
nElements <- 5
# nElements <- 5 # not sure why this is 5
nElements <- length(split_list[[1]]) # maybe it is this?
result <- lapply(seq_len(nElements),
function(i) lapply(split_list, "[[", i))
# the above is like a list version of transpose

for (ii in seq_len(nrow(param_combinations))) {
cv_scores <- mapply(function(X_train, coords_train, X_test,
coords_test, samples_train){
wSIROptimisation(as.matrix(X_train),
coords_train, as.matrix(X_test),
coords_test,
samples_train, param_combinations$slices[ii],
param_combinations$alpha[ii],
evalmetrics = metric,
...)
}, result[[1]], result[[2]], result[[3]], result[[4]], result[[5]])

param_combinations$metric[ii] <- mean(cv_scores, na.rm = TRUE)
}
if (verbose) message("set up nrep random splits of the data into training and test sets")

param_combinations_split = split.data.frame(param_combinations,
seq_len(nrow(param_combinations)))

# for (ii in seq_len(nrow(param_combinations))) {

res_scores_split = BiocParallel::bplapply(

param_combinations_split,

function(x) {

slices_ii = x$slices
alpha_ii = x$alpha
rep_ii = x$rep

data_split_ii = split_list[[rep_ii]]

# this call to mapply will always be over the nrep iterations
# if (FALSE) {
# cv_scores <- mapply(function(X_train, coords_train, X_test,
# coords_test, samples_train){
# wSIR:::wSIROptimisation(exprs_train = as.matrix(X_train),
# coords_train = coords_train,
# exprs_test = as.matrix(X_test),
# coords_test = coords_test,
# samples_train = samples_train,
# # param_combinations$slices[ii],
# # param_combinations$alpha[ii],
# slices = slices_ii,
# alpha = alpha_ii,
# evalmetrics = metric,
# ...)
# }, result[[1]], result[[2]], result[[3]], result[[4]], result[[5]])
# }
cv_score = wSIROptimisation(exprs_train = as.matrix(data_split_ii[[1]]),
coords_train = data_split_ii[[2]],
exprs_test = as.matrix(data_split_ii[[3]]),
coords_test = data_split_ii[[4]],
samples_train = data_split_ii[[5]],
# param_combinations$slices[ii],
# param_combinations$alpha[ii],
slices = slices_ii,
alpha = alpha_ii,
evalmetrics = metric,
# evalmetrics = "DC",
...
)

# return('hello')

# param_combinations$metric[ii] <- mean(cv_scores, na.rm = TRUE)
# param_combinations$metric_sd[ii] <- sd(cv_scores, na.rm = TRUE)


return(data.frame(
slices = slices_ii,
alpha = alpha_ii,
rep = rep_ii,
metric = cv_score
))

# if (FALSE) {
# return(data.frame(
# metric = mean(cv_scores, na.rm = TRUE),
# metric_sd = sd(cv_scores, na.rm = TRUE)
# ))
# }

},
BPPARAM = BPPARAM)

if (verbose) message("completed runs of wSIR and metric calculation")

res_scores <- do.call(rbind, res_scores_split)
# param_combinations <- cbind(param_combinations, res_scores)

# param_combinations <- res_scores %>%
# dplyr::group_by(slices,alpha) %>%
# dplyr::mutate(metric = mean(metric),
# metric_sd = sd(metric))

param_combinations <- do.call(
rbind,
lapply(split.data.frame(res_scores,
interaction(res_scores$slices, res_scores$alpha)),
colMeans))

# return(param_combinations)
# }

res_df <- param_combinations
best_metric_index <- which.max(res_df[, "metric"])
best_alpha <- res_df[best_metric_index, "alpha"]
best_slices <- res_df[best_metric_index, "slices"]

message <- paste0("Optimal (alpha, slices) pair: (",
best_alpha, ", ", best_slices, ")")
message_value <- paste0("Optimal (alpha, slices) pair: (",
best_alpha, ", ", best_slices, ")")
if (verbose) message(message_value)

plot <- ggplot2::ggplot(res_df, mapping = aes(
x = .data$alpha, y = .data$slices, size = .data$metric)) +
ggplot2::geom_point() +
ggplot2::theme_classic() +
ggplot2::ggtitle(
paste0("Metric value for different parameter combinations (",
nrep, " iterations of train/test split)"))
if (plot) {
plot <- ggplot2::ggplot(res_df, mapping = aes(
x = .data$alpha, y = .data$slices, size = .data$metric)) +
ggplot2::geom_point() +
ggplot2::theme_classic() +
ggplot2::ggtitle(
paste0("Metric value for different parameter combinations (",
nrep, " iterations of train/test split)"))
} else {
plot = NULL
}

return(list(plot = plot,
message = message,
message = message_value,
best_alpha = best_alpha,
best_slices = best_slices,
results_dataframe = res_df))
Expand Down
Loading

0 comments on commit a737ef5

Please sign in to comment.