From a737ef52d3a79f359ef99b1a1cef9695e34957de Mon Sep 17 00:00:00 2001 From: shazanfar Date: Fri, 27 Sep 2024 22:41:19 +0800 Subject: [PATCH] add interop --- DESCRIPTION | 6 +- NAMESPACE | 11 +++ R/calculatewSIR.R | 99 +++++++++++++++++++++ R/exploreWSIRParams.R | 167 ++++++++++++++++++++++++++++-------- R/runwSIR.R | 75 ++++++++++++++++ R/utils.R | 33 +++++++ R/wSIR.R | 10 ++- R/wSIROptimisation.R | 4 - man/calculatewSIR.Rd | 53 ++++++++++++ man/exploreWSIRParams.Rd | 27 +++--- man/runwSIR.Rd | 55 ++++++++++++ man/sirCategorical.Rd | 6 +- man/wSIR.Rd | 12 +-- man/wSIROptimisation.Rd | 4 - src/wSIR.so | Bin 183560 -> 183560 bytes vignettes/wSIR_vignette.Rmd | 11 ++- 16 files changed, 502 insertions(+), 71 deletions(-) create mode 100644 R/calculatewSIR.R create mode 100644 R/runwSIR.R create mode 100644 R/utils.R create mode 100644 man/calculatewSIR.Rd create mode 100644 man/runwSIR.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bd9fa28..9585577 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,11 @@ Imports: Rfast, doBy, BiocParallel, rlang, - methods + methods, + BiocGenerics, + SummarizedExperiment, + SingleCellExperiment, + SpatialExperiment Suggests: knitr, BiocStyle, class diff --git a/NAMESPACE b/NAMESPACE index f78d4d9..5653716 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,16 +1,26 @@ # Generated by roxygen2: do not edit by hand +export(calculatewSIR) export(exploreWSIRParams) export(findTopGenes) export(generateUmapFromWSIR) export(plotUmapFromWSIR) export(projectWSIR) +export(runwSIR) export(visualiseWSIRDirections) export(wSIR) +importFrom(BiocGenerics,t) importFrom(BiocParallel,MulticoreParam) +importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(Rcpp,evalCpp) importFrom(Rfast,dcor) +importFrom(SingleCellExperiment,reducedDim) +importFrom(SingleCellExperiment,reducedDims) +importFrom(SpatialExperiment,spatialCoords) +importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) importFrom(distances,distances) importFrom(doBy,which.maxn) importFrom(ggplot2,aes) @@ -25,6 +35,7 @@ importFrom(ggplot2,scale_color_gradient) importFrom(ggplot2,theme_classic) importFrom(ggplot2,theme_minimal) importFrom(magrittr,"%>%") +importFrom(methods,as) importFrom(methods,is) importFrom(rlang,.data) importFrom(stats,cor) diff --git a/R/calculatewSIR.R b/R/calculatewSIR.R new file mode 100644 index 0000000..2c0e4d9 --- /dev/null +++ b/R/calculatewSIR.R @@ -0,0 +1,99 @@ +#' calculatewSIR +#' +#' @description +#' Perform wSIR on cells, based on the expression data and a reducedDim in a +#' SingleCellExperiment or SpatialExperiment object +#' +#' @param x A numeric matrix of normalised gene expression data where rows are +#' features and columns are cells. Alternatively, a SingleCellExperiment or +#' SpatialExperiment containing such a matrix +#' @param assay.type if `x` is a SingleCellExperiment of SpatialExperiment then +#' this is the assay for which wSIR will be calculated. Default "logcounts". +#' @param dimred String or integer scalar specifying the dimensionality reduction +#' slot for which to use for the slicing mechanism. Ignored if `coords` given. +#' @param colData_columns character vector specifying the subset of colData +#' columns to be used for the wSIR slicing mechanism. Ignored if `coords` or +#' `dimred` given +#' @param spatialCoords logical indicating if spatialCoords should be used for +#' the wSIR slicing mechanism. Ignored if `coords`, `dimred`, or +#' `colData_columns` given, or if `x` is not a SpatialExperiment object. +#' @param ... arguments passing to `wSIR` +#' +#' @return A wSIR object +#' +#' @importFrom SummarizedExperiment assay assays colData +#' @importFrom SingleCellExperiment reducedDim reducedDims +#' @importFrom SpatialExperiment spatialCoords +#' @importFrom methods is as +#' @importFrom BiocGenerics t +#' +#' @examples +#' data(MouseData) +#' library(SingleCellExperiment) +#' sce = SingleCellExperiment(assays = list(logcounts = t(sample1_exprs)), +#' reducedDims = list(spatial = sample1_coords)) +#' +#' obj = calculatewSIR(x = sce, +#' dimred = "spatial") +#' +#' @export +calculatewSIR <- function(x, + assay.type = "logcounts", + dimred = NULL, + colData_columns = NULL, + spatialCoords = FALSE, + ...) { + + isMatLike <- methods::is(x, "matrix") + + if (isMatLike) { + + wsir_obj <- wSIR(X = x, ...) + + return(wsir_obj) + + } + + if (!assay.type %in% names(SummarizedExperiment::assays(x))) { + stop("assay.type not within assays of x") + } + + X <- SummarizedExperiment::assay(x, assay.type) + + if (!is.null(dimred)) { + + if (!dimred %in% names(SingleCellExperiment::reducedDims(x))) { + stop("dimred not within reducedDims of x") + } + + coords <- SingleCellExperiment::reducedDim(x, dimred) + + } else { + + if (!is.null(colData_columns)) { + + if (!all(colData_columns %in% colnames(SummarizedExperiment::colData(x)))) { + stop("not all colData_columns names are in colnames(colData(x))") + } + + coords <- SummarizedExperiment::colData(x)[,colData_columns, drop = FALSE] + coords <- methods::as(coords, "data.frame") + + } else { + + if (!is.null(spatialCoords)) { + + coords <- SpatialExperiment::spatialCoords(x) + coords <- base::as.data.frame(coords) + colnames(coords)[seq_len(2)] <- c("x", "y") + + } + + } + + } + + wsir_obj <- wSIR(X = BiocGenerics::t(X), coords = coords, ...) + + return(wsir_obj) +} diff --git a/R/exploreWSIRParams.R b/R/exploreWSIRParams.R index a21dfb4..0d3f440 100644 --- a/R/exploreWSIRParams.R +++ b/R/exploreWSIRParams.R @@ -39,9 +39,12 @@ #' embedding. Default is "DC". #' @param nrep integer for the number of train/test splits of the data to #' perform. -#' @param param parallel computing setup for bplapply from BiocParallel +#' @param BPPARAM parallel computing setup for bplapply from BiocParallel #' package. Default is to use a single core, hence -#' default value is MulticoreParam(workers = 1) +#' default value is SerialParam() +#' @param plot logical whether a dotplot of parameters and metrics should be +#' produced, default TRUE +#' @param verbose default TRUE #' @param ... arguments passed on to wSIROptimisation #' #' @return List with five slots, named "plot", "message", "best_alpha", @@ -87,7 +90,7 @@ #' @importFrom magrittr %>% #' @importFrom ggplot2 ggplot aes geom_point theme_classic ggtitle #' @importFrom vctrs vec_rep_each -#' @importFrom BiocParallel bplapply MulticoreParam +#' @importFrom BiocParallel SerialParam SnowParam MulticoreParam bpparam bplapply #' @importFrom stringr word #' #' @export @@ -98,14 +101,21 @@ exploreWSIRParams <- function(X, optim_slices = c(5,10,15,20), metric = "DC", nrep = 5, - param = MulticoreParam(workers = 1), + # BPPARAM = SerialParam(), + nCores = 1, + plot = TRUE, + verbose = TRUE, ... ) { - # vector of all parameter combinations - param_combinations <- expand.grid(slices = optim_slices, alpha = optim_alpha, - metric = NA) + BPPARAM <- .generateBPParam(cores = nCores) + # vector of all parameter combinations + param_combinations <- expand.grid(slices = optim_slices, + alpha = optim_alpha, + rep = seq_len(nrep) + # metric = NA) + ) # # perform bplapply over that list of (pairs of) combinations # metric_vals_list = bplapply(param_combinations, function(parameter_pair) { @@ -130,52 +140,139 @@ exploreWSIRParams <- function(X, nrow = nrow(X), ncol = nrep ) # create training and test set from each column index - split_list <- apply(index_rep, 2, function(keep){ + split_list <- apply(index_rep, 2, function(keep) { X_train <- X[keep,] coords_train <- coords[keep,] samples_train <- samples[keep] X_test <- X[!keep,] coords_test <- coords[!keep,] - list(X_train, coords_train, X_test, - coords_test, samples_train ) + list(X_train, + coords_train, + X_test, + coords_test, + samples_train) }) - nElements <- 5 + # nElements <- 5 # not sure why this is 5 + nElements <- length(split_list[[1]]) # maybe it is this? result <- lapply(seq_len(nElements), function(i) lapply(split_list, "[[", i)) + # the above is like a list version of transpose - for (ii in seq_len(nrow(param_combinations))) { - cv_scores <- mapply(function(X_train, coords_train, X_test, - coords_test, samples_train){ - wSIROptimisation(as.matrix(X_train), - coords_train, as.matrix(X_test), - coords_test, - samples_train, param_combinations$slices[ii], - param_combinations$alpha[ii], - evalmetrics = metric, - ...) - }, result[[1]], result[[2]], result[[3]], result[[4]], result[[5]]) - - param_combinations$metric[ii] <- mean(cv_scores, na.rm = TRUE) - } + if (verbose) message("set up nrep random splits of the data into training and test sets") + + param_combinations_split = split.data.frame(param_combinations, + seq_len(nrow(param_combinations))) + + # for (ii in seq_len(nrow(param_combinations))) { + + res_scores_split = BiocParallel::bplapply( + + param_combinations_split, + + function(x) { + + slices_ii = x$slices + alpha_ii = x$alpha + rep_ii = x$rep + + data_split_ii = split_list[[rep_ii]] + + # this call to mapply will always be over the nrep iterations + # if (FALSE) { + # cv_scores <- mapply(function(X_train, coords_train, X_test, + # coords_test, samples_train){ + # wSIR:::wSIROptimisation(exprs_train = as.matrix(X_train), + # coords_train = coords_train, + # exprs_test = as.matrix(X_test), + # coords_test = coords_test, + # samples_train = samples_train, + # # param_combinations$slices[ii], + # # param_combinations$alpha[ii], + # slices = slices_ii, + # alpha = alpha_ii, + # evalmetrics = metric, + # ...) + # }, result[[1]], result[[2]], result[[3]], result[[4]], result[[5]]) + # } + cv_score = wSIROptimisation(exprs_train = as.matrix(data_split_ii[[1]]), + coords_train = data_split_ii[[2]], + exprs_test = as.matrix(data_split_ii[[3]]), + coords_test = data_split_ii[[4]], + samples_train = data_split_ii[[5]], + # param_combinations$slices[ii], + # param_combinations$alpha[ii], + slices = slices_ii, + alpha = alpha_ii, + evalmetrics = metric, + # evalmetrics = "DC", + ... + ) + + # return('hello') + + # param_combinations$metric[ii] <- mean(cv_scores, na.rm = TRUE) + # param_combinations$metric_sd[ii] <- sd(cv_scores, na.rm = TRUE) + + + return(data.frame( + slices = slices_ii, + alpha = alpha_ii, + rep = rep_ii, + metric = cv_score + )) + + # if (FALSE) { + # return(data.frame( + # metric = mean(cv_scores, na.rm = TRUE), + # metric_sd = sd(cv_scores, na.rm = TRUE) + # )) + # } + + }, + BPPARAM = BPPARAM) + + if (verbose) message("completed runs of wSIR and metric calculation") + + res_scores <- do.call(rbind, res_scores_split) + # param_combinations <- cbind(param_combinations, res_scores) + + # param_combinations <- res_scores %>% + # dplyr::group_by(slices,alpha) %>% + # dplyr::mutate(metric = mean(metric), + # metric_sd = sd(metric)) + + param_combinations <- do.call( + rbind, + lapply(split.data.frame(res_scores, + interaction(res_scores$slices, res_scores$alpha)), + colMeans)) + + # return(param_combinations) + # } res_df <- param_combinations best_metric_index <- which.max(res_df[, "metric"]) best_alpha <- res_df[best_metric_index, "alpha"] best_slices <- res_df[best_metric_index, "slices"] - message <- paste0("Optimal (alpha, slices) pair: (", - best_alpha, ", ", best_slices, ")") + message_value <- paste0("Optimal (alpha, slices) pair: (", + best_alpha, ", ", best_slices, ")") + if (verbose) message(message_value) - plot <- ggplot2::ggplot(res_df, mapping = aes( - x = .data$alpha, y = .data$slices, size = .data$metric)) + - ggplot2::geom_point() + - ggplot2::theme_classic() + - ggplot2::ggtitle( - paste0("Metric value for different parameter combinations (", - nrep, " iterations of train/test split)")) + if (plot) { + plot <- ggplot2::ggplot(res_df, mapping = aes( + x = .data$alpha, y = .data$slices, size = .data$metric)) + + ggplot2::geom_point() + + ggplot2::theme_classic() + + ggplot2::ggtitle( + paste0("Metric value for different parameter combinations (", + nrep, " iterations of train/test split)")) + } else { + plot = NULL + } return(list(plot = plot, - message = message, + message = message_value, best_alpha = best_alpha, best_slices = best_slices, results_dataframe = res_df)) diff --git a/R/runwSIR.R b/R/runwSIR.R new file mode 100644 index 0000000..6c36e6c --- /dev/null +++ b/R/runwSIR.R @@ -0,0 +1,75 @@ +#' runwSIR +#' +#' @description +#' Perform wSIR on cells, based on the expression data and a reducedDim in a +#' SingleCellExperiment or SpatialExperiment object +#' +#' @param x A numeric matrix of normalised gene expression data where rows are +#' features and columns are cells. Alternatively, a SingleCellExperiment or +#' SpatialExperiment containing such a matrix +#' @param name string to specify the name to store the result in the reducedDims +#' of the output. Default is "wSIR" +#' @param scores_only logical whether only the wSIR scores should be calculated. +#' If FALSE additional information about the wSIR model will be stored in the +#' attributes of the object. Default FALSE. +#' @param ... arguments passing to `calculatewSIR` +#' +#' @return If `x` is matrix-like, a list containing wSIR scores, loadings, etc. +#' If `x` is a SingleCellExperiment or SpatialExperiment, the same object is +#' returned with an additional slot in `reducedDims(..., name)` corresponding +#' to the wSIR scores matrix. If `scores_only = FALSE`, then the attributes of +#' the wSIR scores contain the following elements: +#' - directions +#' - estd +#' - W +#' - evalues +#' +#' @importFrom methods is +#' @importFrom SingleCellExperiment reducedDim +#' +#' @examples +#' data(MouseData) +#' library(SingleCellExperiment) +#' library(SpatialExperiment) +#' +#' sce = SingleCellExperiment(assays = list(logcounts = t(sample1_exprs)), +#' reducedDims = list(spatial = sample1_coords)) +#' +#' sce = runwSIR(x = sce, dimred = "spatial") +#' +#' spe = SpatialExperiment(assays = list(logcounts = t(sample1_exprs)), +#' spatialCoords = as.matrix(sample1_coords)) +#' +#' spe = runwSIR(x = spe, spatialCoords = TRUE) +#' +#' @export +runwSIR <- function(x, + name = "wSIR", + scores_only = FALSE, + ...) { + + isMatLike <- methods::is(x, "matrix") + + wsir_obj <- calculatewSIR(x = x, ...) + + if (isMatLike) { + + return(wsir_obj) + + } + + dr <- wsir_obj$scores + + if (scores_only) { + + base::attr(dr, "directions") <- wsir_obj$directions + base::attr(dr, "estd") <- wsir_obj$estd + base::attr(dr, "W") <- wsir_obj$W + base::attr(dr, "evalues") <- wsir_obj$evalues + + } + + SingleCellExperiment::reducedDim(x, name) <- dr + + return(x) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..654a68c --- /dev/null +++ b/R/utils.R @@ -0,0 +1,33 @@ +# ------------------------------------------------------------------------------ +#' Utility function to generate BPPARAM object. +#' +#' @param cores Desired number of cores for BPPARAM object. +#' @return A BPPPARAM object. +#' @importFrom BiocParallel SerialParam SnowParam MulticoreParam bpparam +.generateBPParam <- function(cores = 1) { + seed <- .Random.seed[1] + if (cores == 1) { + BPparam <- BiocParallel::SerialParam(RNGseed = seed) + } else { ## Parallel processing is desired. + ## Also set the BPparam RNGseed if the user ran set.seed(someNumber) themselves. + if (Sys.info()["sysname"] == "Windows") { # Only SnowParam suits Windows. + BPparam <- BiocParallel::SnowParam(min( + cores, + BiocParallel::snowWorkers("SOCK") + ), + RNGseed = seed + ) + } else if (Sys.info()["sysname"] %in% c("MacOS", "Linux")) { + BPparam <- BiocParallel::MulticoreParam(min( + cores, + BiocParallel::multicoreWorkers() + ), + RNGseed = seed + ) + ## Multicore is faster than SNOW, but it doesn't work on Windows. + } else { ## Something weird. + BPparam <- BiocParallel::bpparam() ## BiocParallel will figure it out. + } + } + BPparam +} diff --git a/R/wSIR.R b/R/wSIR.R index 2c71b8a..ccf4d5b 100644 --- a/R/wSIR.R +++ b/R/wSIR.R @@ -21,13 +21,13 @@ #' your specific task and evaluation metric. In that case, determine your #' optimal slices and alpha values and then use #' them in the relevant function, then setting optim_params = FALSE. -#' @param alpha_vals If you have optim_params = TRUE, then this is the +#' @param optim_alpha If you have optim_params = TRUE, then this is the #' values of alpha to optimise over in wSIR. 0 #' gives Sliced Inverse Regression (SIR) implementation, and larger values #' represent stronger spatial correlation. #' Suggest to use integers for interpretability, but can use non-integers. #' Values must be non-negative. -#' @param slice_vals If you have optim_params = TRUE, then this is the values +#' @param optim_slices If you have optim_params = TRUE, then this is the values #' of slices to optimise over in wSIR. #' Suggest maximum value in the vector to be no more than around #' \eqn{\sqrt{n/20}}, as this upper bound ensures an @@ -82,6 +82,12 @@ wSIR <- function(X, verbose = FALSE, ...) { + # browser() + + if (is.null(coords)) { + stop("coords must be provided") + } + if (optim_params) { if (verbose) message("Optimising parameters...") optim_obj <- exploreWSIRParams(X = X, diff --git a/R/wSIROptimisation.R b/R/wSIROptimisation.R index 496ac60..e0116a1 100644 --- a/R/wSIROptimisation.R +++ b/R/wSIROptimisation.R @@ -35,8 +35,6 @@ #' inversely proportional to the physical distance between them. Suggest #' to tune this #' parameter using exploreWSIRParams() function. -#' @param maxDirections integer for the maximum number of directions to -#' include in the low-dimenensional embedding. Default is 50. #' @param evalmetrics evaluation metrics to use for parameter tuning. #' String, options are any or all of: "DC" to use distance #' correlation; "CD" to use correlation of distances; "ncol" to use number @@ -60,7 +58,6 @@ wSIROptimisation <- function(exprs_train, samples_train, slices, alpha, - maxDirections, evalmetrics = c("CD","DC","ncol"), ...) { @@ -70,7 +67,6 @@ wSIROptimisation <- function(exprs_train, samples = samples_train, slices = slices, alpha = alpha, - maxDirections = maxDirections, ...) projected_test <- projectWSIR(wsir = wsir_obj, newdata = exprs_test) diff --git a/man/calculatewSIR.Rd b/man/calculatewSIR.Rd new file mode 100644 index 0000000..9d81fc6 --- /dev/null +++ b/man/calculatewSIR.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculatewSIR.R +\name{calculatewSIR} +\alias{calculatewSIR} +\title{calculatewSIR} +\usage{ +calculatewSIR( + x, + assay.type = "logcounts", + dimred = NULL, + colData_columns = NULL, + spatialCoords = FALSE, + ... +) +} +\arguments{ +\item{x}{A numeric matrix of normalised gene expression data where rows are +features and columns are cells. Alternatively, a SingleCellExperiment or +SpatialExperiment containing such a matrix} + +\item{assay.type}{if \code{x} is a SingleCellExperiment of SpatialExperiment then +this is the assay for which wSIR will be calculated. Default "logcounts".} + +\item{dimred}{String or integer scalar specifying the dimensionality reduction +slot for which to use for the slicing mechanism. Ignored if \code{coords} given.} + +\item{colData_columns}{character vector specifying the subset of colData +columns to be used for the wSIR slicing mechanism. Ignored if \code{coords} or +\code{dimred} given} + +\item{spatialCoords}{logical indicating if spatialCoords should be used for +the wSIR slicing mechanism. Ignored if \code{coords}, \code{dimred}, or +\code{colData_columns} given, or if \code{x} is not a SpatialExperiment object.} + +\item{...}{arguments passing to \code{wSIR}} +} +\value{ +A wSIR object +} +\description{ +Perform wSIR on cells, based on the expression data and a reducedDim in a +SingleCellExperiment or SpatialExperiment object +} +\examples{ +data(MouseData) +library(SingleCellExperiment) +sce = SingleCellExperiment(assays = list(logcounts = t(sample1_exprs)), +reducedDims = list(spatial = sample1_coords)) + +obj = calculatewSIR(x = sce, + dimred = "spatial") + +} diff --git a/man/exploreWSIRParams.Rd b/man/exploreWSIRParams.Rd index ac04958..b2ff8b9 100644 --- a/man/exploreWSIRParams.Rd +++ b/man/exploreWSIRParams.Rd @@ -8,11 +8,13 @@ exploreWSIRParams( X, coords, samples = rep(1, nrow(coords)), - alpha_vals = c(0, 2, 4, 10), - slice_vals = c(5, 10, 15, 20), + optim_alpha = c(0, 2, 4, 10), + optim_slices = c(5, 10, 15, 20), metric = "DC", nrep = 5, - param = MulticoreParam(workers = 1), + BPPARAM = SerialParam(), + plot = TRUE, + verbose = TRUE, ... ) } @@ -34,13 +36,13 @@ format: for the previous example, you could write samples = c(rep("sample 1", 5000), rep("sample 2", 5000)), and the result would be the same.} -\item{alpha_vals}{vector of numbers as the values of parameter alpha to use +\item{optim_alpha}{vector of numbers as the values of parameter alpha to use in WSIR. 0 gives Sliced Inverse Regression (SIR) implementation, and larger values represent stronger spatial correlation. Suggest to use integers for interpretability, but can use non-integers. Values must be non-negative.} -\item{slice_vals}{vector of integers as the values of parameter slices to +\item{optim_slices}{vector of integers as the values of parameter slices to use in WSIR. Suggest maximum value in the vector to be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an average of at least 10 cells per tile in the training set.} @@ -54,9 +56,14 @@ embedding. Default is "DC".} \item{nrep}{integer for the number of train/test splits of the data to perform.} -\item{param}{parallel computing setup for bplapply from BiocParallel +\item{BPPARAM}{parallel computing setup for bplapply from BiocParallel package. Default is to use a single core, hence -default value is MulticoreParam(workers = 1)} +default value is SerialParam()} + +\item{plot}{logical whether a dotplot of parameters and metrics should be +produced, default TRUE} + +\item{verbose}{default TRUE} \item{...}{arguments passed on to wSIROptimisation} } @@ -76,7 +83,7 @@ values that were tested according to selected metric. \item "best_slices" returns the integer for the best slices value among the values that were tested according to selected metric. \item "results_dataframe" returns the results dataframe used to create "plot". -This dataframe has length(alpha_vals)*length(slice_vals) rows, +This dataframe has length(optim_alpha)*length(optim_slices) rows, where one is for each combination of parameters slices and alpha. There is one column for "alpha", one for "slices" and one for each of the evaluation metrics selected in "metrics" argument. Column @@ -100,8 +107,8 @@ This pair of slices and alpha can be used for your downstream tasks. data(MouseData) explore_params = exploreWSIRParams(X = sample1_exprs, coords = sample1_coords, - alpha_vals = c(0,2,4,8), - slice_vals = c(3,6,10)) + optim_alpha = c(0,2,4,8), + optim_slices = c(3,6,10)) explore_params$plot explore_params$message best_alpha = explore_params$best_alpha diff --git a/man/runwSIR.Rd b/man/runwSIR.Rd new file mode 100644 index 0000000..1e25bfe --- /dev/null +++ b/man/runwSIR.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/runwSIR.R +\name{runwSIR} +\alias{runwSIR} +\title{runwSIR} +\usage{ +runwSIR(x, name = "wSIR", scores_only = FALSE, ...) +} +\arguments{ +\item{x}{A numeric matrix of normalised gene expression data where rows are +features and columns are cells. Alternatively, a SingleCellExperiment or +SpatialExperiment containing such a matrix} + +\item{name}{string to specify the name to store the result in the reducedDims +of the output. Default is "wSIR"} + +\item{scores_only}{logical whether only the wSIR scores should be calculated. +If FALSE additional information about the wSIR model will be stored in the +attributes of the object. Default FALSE.} + +\item{...}{arguments passing to \code{calculatewSIR}} +} +\value{ +If \code{x} is matrix-like, a list containing wSIR scores, loadings, etc. +If \code{x} is a SingleCellExperiment or SpatialExperiment, the same object is +returned with an additional slot in \code{reducedDims(..., name)} corresponding +to the wSIR scores matrix. If \code{scores_only = FALSE}, then the attributes of +the wSIR scores contain the following elements: +\itemize{ +\item directions +\item estd +\item W +\item evalues +} +} +\description{ +Perform wSIR on cells, based on the expression data and a reducedDim in a +SingleCellExperiment or SpatialExperiment object +} +\examples{ +data(MouseData) +library(SingleCellExperiment) +library(SpatialExperiment) + +sce = SingleCellExperiment(assays = list(logcounts = t(sample1_exprs)), +reducedDims = list(spatial = sample1_coords)) + +sce = runwSIR(x = sce, dimred = "spatial") + +spe = SpatialExperiment(assays = list(logcounts = t(sample1_exprs)), +spatialCoords = as.matrix(sample1_coords)) + +spe = runwSIR(x = spe, spatialCoords = TRUE) + +} diff --git a/man/sirCategorical.Rd b/man/sirCategorical.Rd index c10cd94..27bafba 100644 --- a/man/sirCategorical.Rd +++ b/man/sirCategorical.Rd @@ -4,7 +4,7 @@ \alias{sirCategorical} \title{sirCategorical} \usage{ -sirCategorical(X, Y, maxDirections = 50, W = NULL, ...) +sirCategorical(X, Y, W = NULL, ...) } \arguments{ \item{X}{matrix of normalised gene expression data for n genes across p @@ -14,10 +14,6 @@ cells.} allocation for each cell. There should be up to slices^2 unique tile IDs in this column.} -\item{maxDirections}{Integer to specify maximum number of directions to -retain in thee low-dimensional embedding -of the data. Use if you need at most a certain number for a downstream task.} - \item{W}{Weight matrix created by createWeightMatrix. Entry (i,j) represents the spatial correlation level between tiles i and j. The diagonal values should be all 1. If not diff --git a/man/wSIR.Rd b/man/wSIR.Rd index b00bdf2..a90d6c6 100644 --- a/man/wSIR.Rd +++ b/man/wSIR.Rd @@ -8,8 +8,8 @@ wSIR( X, coords, optim_params = FALSE, - alpha_vals = c(0, 1, 2, 4, 8, 12), - slice_vals = c(3, 5, 7, 10, 15, 20), + optim_alpha = c(0, 1, 2, 4, 8, 12), + optim_slices = c(3, 5, 7, 10, 15, 20), metric = "DC", nrep = 5, verbose = FALSE, @@ -33,14 +33,14 @@ your specific task and evaluation metric. In that case, determine your optimal slices and alpha values and then use them in the relevant function, then setting optim_params = FALSE.} -\item{alpha_vals}{If you have optim_params = TRUE, then this is the +\item{optim_alpha}{If you have optim_params = TRUE, then this is the values of alpha to optimise over in wSIR. 0 gives Sliced Inverse Regression (SIR) implementation, and larger values represent stronger spatial correlation. Suggest to use integers for interpretability, but can use non-integers. Values must be non-negative.} -\item{slice_vals}{If you have optim_params = TRUE, then this is the values +\item{optim_slices}{If you have optim_params = TRUE, then this is the values of slices to optimise over in wSIR. Suggest maximum value in the vector to be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an @@ -91,8 +91,8 @@ data(MouseData) wsir_obj = wSIR(X = sample1_exprs, coords = sample1_coords, optim_params = TRUE, - alpha_vals = c(0,2,4), - slice_vals = c(3,6,10), + optim_alpha = c(0,2,4), + optim_slices = c(3,6,10), metric = "DC", nrep = 1) # create wsir object diff --git a/man/wSIROptimisation.Rd b/man/wSIROptimisation.Rd index cfbdf4c..85022b8 100644 --- a/man/wSIROptimisation.Rd +++ b/man/wSIROptimisation.Rd @@ -12,7 +12,6 @@ wSIROptimisation( samples_train, slices, alpha, - maxDirections, evalmetrics = c("CD", "DC", "ncol"), ... ) @@ -52,9 +51,6 @@ inversely proportional to the physical distance between them. Suggest to tune this parameter using exploreWSIRParams() function.} -\item{maxDirections}{integer for the maximum number of directions to -include in the low-dimenensional embedding. Default is 50.} - \item{evalmetrics}{evaluation metrics to use for parameter tuning. String, options are any or all of: "DC" to use distance correlation; "CD" to use correlation of distances; "ncol" to use number diff --git a/src/wSIR.so b/src/wSIR.so index 1e42fe7af4ca86604ab52f961d1adfafe02d3e63..096b9439fcbf5c94fbdc298e63ad760b9d2e79c8 100755 GIT binary patch delta 174 zcmV;f08#&lnhS`U3$S1Y5FxATk(h}yV6$dq2)Ce*m|NplQlrcBZ`qTGC$x{0R|dAh!y$z${aPI?AF`2gvSCv z1R(SvL?&1BHi8y9zt80KLeIMVwBUN`)T1A_Z4x1_BhHuP%K{)E0hGw3rLclYuV&aHC`l7K3mGhj0b~w{Qjm2rdJFZ}PW$ zGy?q`1A=ezw|h|n>2)BX^W*H@pF9_y8MsA2X>A5?HHl$EIo(hgkQ^noE-DVUgvSCv z1Ry-M=($ft<#KK2s>Q8Y$IldPUvy^t32J8JHk{E_Q(~9o%K{)EgIGV8(C-gHf+g-= cNP_E)26&IZ)8Mbf=gsR3^E~=wmn6&r