From 8418e364b877842d79d3b6c158808457941f2153 Mon Sep 17 00:00:00 2001 From: shazanfar Date: Thu, 19 Sep 2024 12:54:24 +1000 Subject: [PATCH] changes for check and bioccheck partway --- DESCRIPTION | 28 +- NAMESPACE | 2 + R/exploreWSIRParams.R | 54 +- R/findTopGenes.R | 17 +- R/generateUmapFromWSIR.R | 2 +- R/plotUmapFromWSIR.R | 21 +- R/sirCategorical.R | 8 +- R/sirPCA.R | 20 +- R/visualiseWSIRDirections.R | 24 +- R/wSIR.R | 63 +- R/wSIROptimisation.R | 21 +- R/wSIRSpecifiedParams.R | 12 +- man/exploreWSIRParams.Rd | 20 +- man/generateUmapFromWSIR.Rd | 2 +- man/plotUmapFromWSIR.Rd | 10 +- man/sirCategorical.Rd | 7 +- man/sirPCA.Rd | 16 +- man/visualiseWSIRDirections.Rd | 3 +- man/wSIR.Rd | 49 +- man/wSIROptimisation.Rd | 11 +- man/wSIRSpecifiedParams.Rd | 12 +- src/wSIR.so | Bin 183560 -> 183560 bytes vignettes/wSIR_vignette.Rmd | 4 +- vignettes/wSIR_vignette.html | 2373 -------------------------------- 24 files changed, 190 insertions(+), 2589 deletions(-) delete mode 100644 vignettes/wSIR_vignette.html diff --git a/DESCRIPTION b/DESCRIPTION index f34bfe3..0225faf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: wSIR Type: Package -Title: Weighted Sliced Inverse Regression (WSIR) for supervised +Title: Weighted Sliced Inverse Regression (wSIR) for supervised dimension reduction of spatial transcriptomics and single cell gene expression data -Version: 0.1.2 +Version: 0.1.3 Authors@R: c( person("Max", "Woollard", , "mwoo5086@uni.sydney.edu.au", role = c("aut", "cre", "ctb")), person("Pratibha", "Panwar", role = c("ctb")), @@ -21,17 +21,29 @@ Description: Weighted Sliced Inverse Regression (wSIR) is a supervised dimension information present in the gene expression data. License: GPL-2 Encoding: UTF-8 -URL: https://sydneybiox.github.io/wSIR, - https://sydneybiox.github.io/wSIR/ +URL: https://sydneybiox.github.io/wSIR BugReports: https://github.com/sydneybiox/wSIR/issues biocViews: DimensionReduction, Software LazyData: true Roxygen: list(markdown = TRUE) Depends: R (>= 4.3.0), -Imports: Rfast, magrittr, ggplot2, umap, vctrs, class, stringr, - distances, BiocStyle, Rcpp (>= 1.0.11), - RcppArmadillo, knitr, doBy, BiocParallel -LinkingTo: Rcpp, RcppArmadillo +Imports: Rfast, + magrittr, + ggplot2, + umap, + vctrs, + stringr, + distances, + Rcpp (>= 1.0.11), + doBy, + BiocParallel, + rlang, + methods +Suggests: knitr, + BiocStyle, + class +LinkingTo: Rcpp, + RcppArmadillo VignetteBuilder: knitr RoxygenNote: 7.3.2 NeedsCompilation: yes diff --git a/NAMESPACE b/NAMESPACE index cfea772..05e0519 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,8 @@ importFrom(ggplot2,scale_color_gradient) importFrom(ggplot2,theme_classic) importFrom(ggplot2,theme_minimal) importFrom(magrittr,"%>%") +importFrom(methods,is) +importFrom(rlang,.data) importFrom(stats,cor) importFrom(stats,reorder) importFrom(stringr,word) diff --git a/R/exploreWSIRParams.R b/R/exploreWSIRParams.R index 28a751f..1eef222 100644 --- a/R/exploreWSIRParams.R +++ b/R/exploreWSIRParams.R @@ -6,7 +6,7 @@ #' it will visualise the performance of WSIR with varying function parameters based on your data, and return the optimal pair. #' This pair of slices and alpha can be used for your downstream tasks. #' -#' @param exprs matrix containing normalised gene expression data including n cells and p genes, dimension n * p. +#' @param X matrix containing normalised gene expression data including n cells and p genes, dimension n * p. #' @param coords dataframe containing spatial positions of n cells in 2D space. Dimension n * 2. Column names must be c("x", "y"). #' @param samples sample ID of each cell. In total, must have length equal to the number of cells. For example, if #' your dataset has 10000 cells, the first 5000 from sample 1 and the remaining 5000 from sample 2, you would write @@ -18,15 +18,13 @@ #' but can use non-integers. Values must be non-negative. #' @param slice_vals vector of integers as the values of parameter slices to use in WSIR. Suggest maximum value in the vector to #' be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an average of at least 10 cells per tile in the training set. -#' @param varThreshold numeric proportion of variance in \code{t(X_H) \%*\% W \%*\% X_H} to retain. Must be between 0 and 1. Default is 0.95. -#' Select higher threshold to include more dimensions, lower threshold to include less dimensions. -#' @param maxDirections integer for the maximum number of directions to include in the low-dimensional embedding. Default is 50. -#' @param metric evaluation metric to use for parameter tuning to select optimal parameter combination. String, use "DC" to +#' @param metric character single value. Evaluation metric to use for parameter tuning to select optimal parameter combination. String, use "DC" to #' use distance correlation, "CD" to use correlation of distances, or "ncol" for the number of dimensions in the low-dimensional #' embedding. Default is "DC". #' @param nrep integer for the number of train/test splits of the data to perform. #' @param param parallel computing setup for bplapply from BiocParallel package. Default is to use a single core, hence #' default value is MulticoreParam(workers = 1) +#' @param ... arguments passed on to wSIROptimisation #' #' @return List with five slots, named "plot", "message", "best_alpha", "best_slices" and "results_dataframe". #' 1) "plot" shows the average metric value across the nrep iterations for every combination of parameters slices and alpha. @@ -43,7 +41,7 @@ #' #' @examples #' data(MouseData) -#' explore_params = exploreWSIRParams(exprs = sample1_exprs, +#' explore_params = exploreWSIRParams(X = sample1_exprs, #' coords = sample1_coords, #' alpha_vals = c(0,2,4,8), #' slice_vals = c(3,6,10)) @@ -58,27 +56,24 @@ #' slices = best_slices) #' #' @importFrom magrittr %>% -#' @importFrom ggplot2 ggplot -#' @importFrom ggplot2 aes -#' @importFrom ggplot2 geom_point -#' @importFrom ggplot2 theme_classic -#' @importFrom ggplot2 ggtitle +#' @importFrom ggplot2 ggplot aes geom_point theme_classic ggtitle #' @importFrom vctrs vec_rep_each -#' @importFrom BiocParallel bplapply -#' @importFrom BiocParallel MulticoreParam +#' @importFrom BiocParallel bplapply MulticoreParam #' @importFrom stringr word #' #' @export -exploreWSIRParams = function(exprs, +exploreWSIRParams = function(X, coords, samples = rep(1, nrow(coords)), alpha_vals = c(0,2,4,10), slice_vals = c(5,10,15,20), - varThreshold = 0.95, - maxDirections = 50, + # varThreshold = 0.95, + # maxDirections = 50, metric = "DC", nrep = 5, - param = MulticoreParam(workers = 1)) { + param = MulticoreParam(workers = 1), + ... + ) { # vector of all parameter combinations param_combinations = expand.grid(slice = slice_vals, alpha = alpha_vals, metric = NA) @@ -102,17 +97,17 @@ exploreWSIRParams = function(exprs, # Create pre-specified random splits of data, each columns corresponding to one split index_rep <- matrix( - sample(c(TRUE, FALSE), nrow(exprs)*nrep, replace = TRUE), - nrow = nrow(exprs), ncol = nrep + sample(c(TRUE, FALSE), nrow(X)*nrep, replace = TRUE), + nrow = nrow(X), ncol = nrep ) # create training and test set from each column index split_list <- apply(index_rep, 2, function(keep){ - exprs_train <- exprs[keep,] + X_train <- X[keep,] coords_train <- coords[keep,] samples_train <- samples[keep] - exprs_test <- exprs[!keep,] + X_test <- X[!keep,] coords_test <- coords[!keep,] - list(exprs_train, coords_train, exprs_test, + list(X_train, coords_train, X_test, coords_test, samples_train ) }) nElements = 5 @@ -121,14 +116,17 @@ exploreWSIRParams = function(exprs, for (ii in 1:nrow(param_combinations)){ a <- Sys.time() - cv_scores <- mapply(function(exprs_train, coords_train, exprs_test, + cv_scores <- mapply(function(X_train, coords_train, X_test, coords_test, samples_train){ - wSIROptimisation(as.matrix(exprs_train), - coords_train, as.matrix(exprs_test), + wSIROptimisation(as.matrix(X_train), + coords_train, as.matrix(X_test), coords_test, samples_train, param_combinations$slice[ii], param_combinations$alpha[ii], - varThreshold = varThreshold, - maxDirections = maxDirections, metric = "DC") + # varThreshold = varThreshold, + # maxDirections = maxDirections, + # metric = "DC", + evalmetrics = metric, + ...) }, result[[1]], result[[2]], result[[3]], result[[4]], result[[5]]) param_combinations$metric[ii] <- mean(cv_scores, na.rm = TRUE) @@ -142,7 +140,7 @@ exploreWSIRParams = function(exprs, message = paste0("Optimal (alpha, slices) pair: (", best_alpha, ", ", best_slices, ")") - plot <- ggplot(res_df, mapping = aes(x = alpha, y = slice, size = metric)) + + plot <- ggplot(res_df, mapping = aes(x = .data$alpha, y = .data$slice, size = .data$metric)) + geom_point() + theme_classic() + ggtitle(paste0("Metric value for different parameter combinations (",nrep, " iterations of train/test split)")) diff --git a/R/findTopGenes.R b/R/findTopGenes.R index 71a2705..a1d9875 100644 --- a/R/findTopGenes.R +++ b/R/findTopGenes.R @@ -19,10 +19,11 @@ #' @importFrom ggplot2 ggplot aes geom_col theme_minimal labs geom_hline ggtitle facet_wrap #' @importFrom stats reorder #' @importFrom vctrs vec_rep_each +#' @importFrom rlang .data #' #' @examples #' data(MouseData) -#' +#' #' wsir_obj = wSIR(X = sample1_exprs, #' coords = sample1_coords, #' optim_params = FALSE, @@ -53,13 +54,13 @@ findTopGenes <- function(WSIR, highest = 10, dirs = 1) { j = j + 1 } - loadings_plot <- ggplot(aes(x = reorder(gene, -loading, sum), y = loading), data = res_df) + - geom_col(width = 0.6) + - theme_minimal() + - labs(x = "Gene", y = "Loading values from WSIR directions") + - geom_hline(yintercept = 0) + - ggtitle(paste("Top", highest, "genes with highest/lowest loading in", paste(paste0("WSIR", dirs), collapse = ", "))) + - facet_wrap(~direction, nrow = 2, scales = "free") + loadings_plot <- ggplot2::ggplot(aes(x = stats::reorder(.data$gene, -.data$loading, sum), y = .data$loading), data = res_df) + + ggplot2::geom_col(width = 0.6) + + ggplot2::theme_minimal() + + ggplot2::labs(x = "Gene", y = "Loading values from WSIR directions") + + ggplot2::geom_hline(yintercept = 0) + + ggplot2::ggtitle(paste("Top", highest, "genes with highest/lowest loading in", paste(paste0("WSIR", dirs), collapse = ", "))) + + ggplot2::facet_wrap(~direction, nrow = 2, scales = "free") return(list(plot = loadings_plot, genes = res_df)) diff --git a/R/generateUmapFromWSIR.R b/R/generateUmapFromWSIR.R index 5eac6c6..d60acbc 100644 --- a/R/generateUmapFromWSIR.R +++ b/R/generateUmapFromWSIR.R @@ -24,7 +24,7 @@ #' umap_coords = generateUmapFromWSIR(WSIR = wsir_obj) #' top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 4) # create top genes object #' umap_plot = plotUmapFromWSIR(umap_coords = umap_coords, -#' exprs = sample1_exprs, +#' X = sample1_exprs, #' highest_genes = top_genes_obj, #' n_genes = 4) #' umap_plot diff --git a/R/plotUmapFromWSIR.R b/R/plotUmapFromWSIR.R index 3d67e7c..b979425 100644 --- a/R/plotUmapFromWSIR.R +++ b/R/plotUmapFromWSIR.R @@ -4,14 +4,14 @@ #' A function to plot a UMAP generated on the low-dimensional embedding of the gene expression data. The points are coloured by #' their value for the genes with highest (in absolute value) loading in a selected WSIR direction, by default WSIR1. #' -#' @param exprs matrix containing normalised gene expression data including n cells and p genes, dimension n * p. +#' @param X matrix containing normalised gene expression data including n cells and p genes, dimension n * p. #' @param umap_coords UMAP coordinates for each cell that is output of generateUmapFromWSIR function. The UMAP coordinates can be #' based on any dimension reduction method, e.g they could be the UMAP coordinates computed on the WSIR dimension reduction #' of the gene expression data, or on the PCs (principal components), or on any other low-dimensional matrix. Must be -#' a matrix of dimension nrow(exprs) * 2. +#' a matrix of dimension nrow(X) * 2. #' @param highest_genes output from findTopGenes function. Default is NULL so an error message can easily be thrown if genes #' and highest_genes are both not provided. -#' @param genes vector with gene names (must all be in colnames(exprs)) you wish to show in the UMAP plot. The cells +#' @param genes vector with gene names (must all be in colnames(X)) you wish to show in the UMAP plot. The cells #' in those plots will be coloured by their expression values for the genes you provide here. Must provide either genes #' or highest_genes parameter (not both): provide genes if you want to visualise a few specific genes, provide highest_genes #' if you want to visualise the genes that are found to be the most important to the WSIR directions. Default is NULL @@ -27,6 +27,7 @@ #' @importFrom vctrs vec_rep_each #' @importFrom ggplot2 facet_wrap ggplot aes geom_point #' @importFrom magrittr %>% +#' @importFrom rlang .data #' #' @examples #' data(MouseData) @@ -38,14 +39,14 @@ #' umap_coords = generateUmapFromWSIR(WSIR = wsir_obj) #' top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 4) # create top genes object #' umap_plot = plotUmapFromWSIR(umap_coords = umap_coords, -#' exprs = sample1_exprs, +#' X = sample1_exprs, #' highest_genes = top_genes_obj, #' n_genes = 4) #' umap_plot #' #' @export -plotUmapFromWSIR <- function(exprs, +plotUmapFromWSIR <- function(X, umap_coords, highest_genes = NULL, genes = NULL, @@ -62,19 +63,19 @@ plotUmapFromWSIR <- function(exprs, n_genes <- min(n_genes, length(genes)) # make sure it is a valid number of genes gene_names <- genes[1:n_genes] - gene_inds <- match(gene_names, colnames(exprs)) # identify the relevant columns in the gene expression matrix + gene_inds <- match(gene_names, colnames(X)) # identify the relevant columns in the gene expression matrix # create and fill umap_df - umap_df <- matrix(NA, nrow = n_genes*nrow(exprs), ncol = 4) %>% as.data.frame() + umap_df <- matrix(NA, nrow = n_genes*nrow(X), ncol = 4) %>% as.data.frame() colnames(umap_df) <- c("UMAP1", "UMAP2", "gene", "expression") umap_df$UMAP1 <- rep(umap_coords[,1], n_genes) umap_df$UMAP2 <- rep(umap_coords[,2], n_genes) - umap_df$gene <- vec_rep_each(gene_names, nrow(exprs)) - umap_df$expression <- as.matrix(exprs)[, gene_inds] %>% as.vector() + umap_df$gene <- vec_rep_each(gene_names, nrow(X)) + umap_df$expression <- as.matrix(X)[, gene_inds] %>% as.vector() # create plot - plot = ggplot(data = umap_df, aes(x = UMAP1, y = UMAP2, colour = expression)) + + plot = ggplot(data = umap_df, aes(x = .data$UMAP1, y = .data$UMAP2, colour = .data$expression)) + geom_point() + facet_wrap(~gene) + theme_classic() diff --git a/R/sirCategorical.R b/R/sirCategorical.R index 2c19492..7aa13d6 100644 --- a/R/sirCategorical.R +++ b/R/sirCategorical.R @@ -6,12 +6,10 @@ #' @param X matrix of normalised gene expression data for n genes across p cells. #' @param Y dataframe with 1 column named "coordinate" that is the tile allocation for each cell. There should #' be up to slices^2 unique tile IDs in this column. -#' @param directions Integer to specify maximum number of directions to retain in thee low-dimensional embedding +#' @param maxDirections Integer to specify maximum number of directions to retain in thee low-dimensional embedding #' of the data. Use if you need at most a certain number for a downstream task. #' @param W Weight matrix created by createWeightMatrix. Entry (i,j) represents the spatial correlation level #' between tiles i and j. The diagonal values should be all 1. If not provided, SIR implementation will be used. -#' @param varThreshold numeric value specifying the desired proportion of variance to retain. If e.g 95% (default), -#' then number of directions used will be such that their eigenvalues add up to 95% of the sum of all eigenvalues. #' #' @return list of outputs with 5 named slots. They are the same as the output of the wSIR function: this is #' the final step in the wSIR function. @@ -20,7 +18,7 @@ sirCategorical <- function(X, Y, - directions = 50, + maxDirections = 50, W = NULL, # varThreshold = 0.95 ... @@ -43,7 +41,7 @@ sirCategorical <- function(X, } pc_dirs <- sirPCA(sliced_data, - directions = directions, + # maxDirections = maxDirections, W = W, # varThreshold = varThreshold ... diff --git a/R/sirPCA.R b/R/sirPCA.R index 99afb35..dec9b13 100644 --- a/R/sirPCA.R +++ b/R/sirPCA.R @@ -1,24 +1,24 @@ #' sirPCA #' #' @description -#' This function performs eigendecomposition on (X^H)^t %*% W %*% (X^H), where X^H is matrix of scaled slice means. +#' This function performs eigendecomposition on `(X^H)^t %*% W %*% (X^H)`, where `X^H` is matrix of scaled slice means. #' #' @param sliced_data matrix of scaled slice means -#' @param directions integer, number of directions we want in our final low-dimensional Z. +#' @param maxDirections integer (default 50), number of directions we want in our final low-dimensional Z. #' @param W matrix of slice weights. Output of createWeightMatrix function. -#' @param varThreshold numeric proportion of eigenvalues of variance in t(X_H) %*% W %*% X_H to retain. -#' Default is 0.95. Select higher threshold to include more dimensions, lower threshold to include less dimensions. +#' @param varThreshold numeric proportion of eigenvalues of variance in `t(X_H) %*% W %*% X_H` to retain. +#' Default is 0.99. Select higher threshold to include more dimensions, lower threshold to include less dimensions. #' -#' @return list containing: 1) "eigenvectors" matrix of eigenvectors of (X^H)^t %*% W %*% (X^H) +#' @return list containing: 1) "eigenvectors" matrix of eigenvectors of `(X^H)^t %*% W %*% (X^H)` #' 2) "d" integer, selected number of dimensions -#' 3) "evalues" vector of eigenvalues of (X^H)^t %*% W %*% (X^H) +#' 3) "evalues" vector of eigenvalues of `(X^H)^t %*% W %*% (X^H)` #' #' @keywords internal sirPCA <- function(sliced_data, - directions, - W = diag(nrow(sliced_data)), - varThreshold = 0.95) { + maxDirections = 50, + W = diag(nrow(sliced_data)), + varThreshold = 0.99) { nslices <- nrow(sliced_data) sliced_data <- as.matrix(sliced_data) @@ -32,7 +32,7 @@ sirPCA <- function(sliced_data, propvariance_explained <- cumsum(eig_m_values)/sum(eig_m_values) d <- which(propvariance_explained >= varThreshold)[1] - d <- min(d, directions) # directions is a maximum number of directions + d <- min(d, maxDirections) return(list(evectors = all_pc[,1:d], d = d, evalues = eig_m$values)) diff --git a/R/visualiseWSIRDirections.R b/R/visualiseWSIRDirections.R index e83d4fa..a712863 100644 --- a/R/visualiseWSIRDirections.R +++ b/R/visualiseWSIRDirections.R @@ -18,14 +18,9 @@ #' coloured by their value for each of the first 'dirs' WSIR directions. #' #' @importFrom magrittr %>% -#' @importFrom ggplot2 ggplot -#' @importFrom ggplot2 aes -#' @importFrom ggplot2 geom_point -#' @importFrom ggplot2 theme_classic -#' @importFrom ggplot2 facet_wrap -#' @importFrom ggplot2 ggtitle -#' @importFrom ggplot2 scale_color_gradient +#' @importFrom ggplot2 ggplot aes geom_point theme_classic facet_wrap ggtitle scale_color_gradient #' @importFrom vctrs vec_rep_each +#' @importFrom rlang .data #' #' @examples #' data(MouseData) @@ -34,7 +29,8 @@ #' optim_params = FALSE, #' alpha = 4, #' slices = 6) # create wsir object -#' vis_obj = visualiseWSIRDirections(coords = sample1_coords, WSIR = wsir_obj, dirs = 8) # create visualisations +#' vis_obj = visualiseWSIRDirections(coords = sample1_coords, +#' WSIR = wsir_obj, dirs = 8) # create visualisations #' vis_obj #' #' @export @@ -53,11 +49,11 @@ visualiseWSIRDirections <- function(coords, WSIR, dirs = 6, mincol = "blue", max vis_df_long$WSIR_direction <- as.factor(vec_rep_each(c(1:dirs), nrow(coords))) # produce plot - plot <- ggplot(aes(x = x, y = y, color = value), data = vis_df_long) + - geom_point() + - theme_classic() + - facet_wrap(~WSIR_direction, scales = "fixed") + # one panel per WSIR direction - ggtitle("Cells at true positions coloured by WSIR values") + - scale_color_gradient(low = mincol, high = maxcol) + plot <- ggplot2::ggplot(aes(x = .data$x, y = .data$y, color = .data$value), data = vis_df_long) + + ggplot2::geom_point() + + ggplot2::theme_classic() + + ggplot2::facet_wrap(~WSIR_direction, scales = "fixed") + # one panel per WSIR direction + ggplot2::ggtitle("Cells at true positions coloured by WSIR values") + + ggplot2::scale_color_gradient(low = mincol, high = maxcol) return(plot) } diff --git a/R/wSIR.R b/R/wSIR.R index 7f46fa2..1068bbd 100644 --- a/R/wSIR.R +++ b/R/wSIR.R @@ -7,42 +7,31 @@ #' #' @param X matrix containing normalised gene expression data including n cells and p genes, dimension n * p. #' @param coords dataframe containing spatial positions of n cells in 2D space. Dimension n * 2. Column names must be c("x", "y"). -#' @param samples sample ID of each cell. In total, must have length equal to the number of cells. For example, if -#' your dataset has 10000 cells, the first 5000 from sample 1 and the remaining 5000 from sample 2, you would write -#' samples = c(rep(1, 5000), rep(2, 5000)) to specify that the first 5000 cells are sample 1 and the remaining are sample 2. -#' Default is that all cells are from sample 1. Sample IDs can be of any format: for the previous example, you could write -#' samples = c(rep("sample 1", 5000), rep("sample 2", 5000)), and the result would be the same. -#' @param slices integer for number of slices on each axis of tissue. For example, slices = 4 creates 4 slices along -#' each spatial axis, yielding 4^2 = 16 tiles. Default is 8, suggested minimum of 3. Suggest to tune this parameter. -#' @param alpha integer to specify desired strength of spatial correlation. Suggest to tune this parameter on testing -#' dataset among e.g values c(0,2,4,8). alpha = 0 gives SIR implementation. Larger values give stronger spatial correlations. -#' @param maxDirections integer for upper limit on number of directions to include in low-dimensional space. Use if you -#' need less than a certain number of dimensions for downstream analyes -#' @param varThreshold numeric proportion of eigenvalues of variance in t(X_H) %*% W %*% X_H to retain. Default is 0.95. -#' Select higher threshold to include more dimensions, lower threshold to include less dimensions. -#' @param optim_params logical, if you would like wSIR to automatically optimise parameters slices and alpha based on -#' either distance correlation or correlation of distances as evaluation metric. Default is TRUE. If your downstream +#' @param optim_params logical default FALSE. If you would like wSIR to automatically optimise parameters slices and alpha based on +#' either distance correlation or correlation of distances as evaluation metric. If your downstream #' task is quite different to either of those metrics, then we suggest you tune those two parameters yourself using #' your specific task and evaluation metric. In that case, determine your optimal slices and alpha values and then use #' them in the relevant function, then setting optim_params = FALSE. -#' @param alpha_vals If you have optim_params = TRUE, then this is the values of alpha to optimise over in WSIR. 0 +#' @param alpha_vals If you have optim_params = TRUE, then this is the values of alpha to optimise over in wSIR. 0 #' gives Sliced Inverse Regression (SIR) implementation, and larger values represent stronger spatial correlation. #' Suggest to use integers for interpretability, but can use non-integers. Values must be non-negative. -#' @param slice_vals If you have optim_params = TRUE, then this is the values of slices to optimise over in WSIR. +#' @param slice_vals If you have optim_params = TRUE, then this is the values of slices to optimise over in wSIR. #' Suggest maximum value in the vector to be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an #' average of at least 10 cells per tile in the training set. #' @param metric If optim_params = TRUE, this is the evaluation metric to use for parameter tuning. String, #' either "DC" to use distance correlation or "CD" to use correlation of distances. Default is "DC". #' @param nrep If optim_params = TRUE, this is the integer for the number of train/test splits of the data to #' perform during optimisation of parameters slices and alpha. +#' @param verbose logical (default FALSE) whether progress messages should be printed +#' @param ... arguments passing to `exploreWSIRParams` and `wSIRSpecifiedParams` #' #' @return wSIR object which is a list containing 5 (named) positions. 1) scores matrix containing low-dimensional -#' representation of X from wSIR algorithm. Dimension n * d, where d is chosen to include at least varThreshold proportion of variance. -#' 2) directions matrix containing WSIR directions, dimension p * d. Use this to project new data into low-dimensional space via X_new %*% directions. +#' representation of X from wSIR algorithm. Dimension `n * d`, where d is chosen to include at least varThreshold proportion of variance. +#' 2) directions matrix containing wSIR directions, dimension `p * d`. Use this to project new data into low-dimensional space via X_new %*% directions. #' 3) estd integer stating how many dimensions in the computed low-dimensional space are needed to account for varThreshold #' proportion of variance. Same as number of dimensions in scores. #' 4) W matrix weight matrix from cells_weight_matrix2 function -#' 5) evalues vector containing p eigenvalues of t(X_H) %*% W %*% X_H. varThreshold parameter works on these evalues, +#' 5) evalues vector containing p eigenvalues of `t(X_H) %*% W %*% X_H`. varThreshold parameter works on these evalues, #' such that e.g the first j directions are included if the sum of the first j evalues equals 0.95% of the sum of all evalues. #' #' @useDynLib wSIR @@ -55,47 +44,53 @@ #' optim_params = TRUE, #' alpha_vals = c(0,2,4), #' slice_vals = c(3,6,10), -#' metric = "CD", +#' metric = "DC", #' nrep = 1) # create wsir object #' #' @export wSIR = function(X, coords, - samples = rep(1, nrow(coords)), - slices = 8, - alpha = 4, - maxDirections = 50, + # samples = rep(1, nrow(coords)), + # slices = 8, + # alpha = 4, + # maxDirections = 50, # varThreshold = 0.95, - optim_params = TRUE, + optim_params = FALSE, alpha_vals = c(0,1,2,4,8,12), slice_vals = c(3,5,7,10,15,20), metric = "DC", nrep = 5, + verbose = FALSE, ...) { if (optim_params) { - optim_obj = exploreWSIRParams(exprs = X, + if (verbose) message("Optimising parameters...") + optim_obj = exploreWSIRParams(X = X, coords = coords, - samples = samples, + # samples = samples, + # varThreshold = varThreshold, + # maxDirections = maxDirections, alpha_vals = alpha_vals, slice_vals = slice_vals, - # varThreshold = varThreshold, - maxDirections = maxDirections, metric = metric, nrep = nrep, ...) alpha = optim_obj$best_alpha slices = optim_obj$best_slices + if (verbose) message("Optimising parameters... complete!") } + if (verbose) message("Fitting wSIR model...") wsir_obj <- wSIRSpecifiedParams(X = X, coords = coords, - samples = samples, - alpha = alpha, - slices = slices, - maxDirections = maxDirections, + # samples = samples, + # alpha = alpha, + # slices = slices, + # maxDirections = maxDirections, # varThreshold = varThreshold ... ) + if (verbose) message("Fitting wSIR model... complete!") + return(wsir_obj) } diff --git a/R/wSIROptimisation.R b/R/wSIROptimisation.R index 819bd4e..e14a6c0 100644 --- a/R/wSIROptimisation.R +++ b/R/wSIROptimisation.R @@ -18,19 +18,16 @@ #' Default value is 4, at which weight matrix value for a pair of tiles is inversely proportional to the physical distance between them. Suggest to tune this #' parameter using exploreWSIRParams() function. #' @param maxDirections integer for the maximum number of directions to include in the low-dimenensional embedding. Default is 50. -#' @param varThreshold numeric proportion of variance in \code{t(X_H) \%*\% W \%*\% X_H} to retain. Must be between 0 and 1. Default is 0.95. -#' Select higher threshold to include more dimensions, lower threshold to include less dimensions. -#' @param metrics evaluation metrics to use for parameter tuning. String, options are any or all of: "DC" to use distance +#' @param evalmetrics evaluation metrics to use for parameter tuning. String, options are any or all of: "DC" to use distance #' correlation; "CD" to use correlation of distances; "ncol" to use number of columns in low-dimensional embedding. Default is all three, #' specified by metrics = c("DC", "CD", "ncol"). +#' @param ... arguments passed to wSIRSpecifiedParams #' #' @return Average metric value for the selected metric(s) over each train/test split. #' -#' #' @importFrom stats cor #' @importFrom distances distances #' -#' #' @keywords internal wSIROptimisation = function(exprs_train, @@ -41,8 +38,9 @@ wSIROptimisation = function(exprs_train, slices, alpha, maxDirections, - varThreshold, - metrics = c("CD","DC","ncol")) { + # varThreshold, + evalmetrics = c("CD","DC","ncol"), + ...) { results = NULL wsir_obj = wSIRSpecifiedParams(X = exprs_train, @@ -51,10 +49,11 @@ wSIROptimisation = function(exprs_train, slices = slices, alpha = alpha, maxDirections = maxDirections, - varThreshold = varThreshold) + # varThreshold = varThreshold, + ...) projected_test = projectWSIR(wsir = wsir_obj, newdata = exprs_test) - if ("CD" %in% metrics) { + if ("CD" %in% evalmetrics) { # Replace dist by distances d1 <- as.matrix(distances::distances(projected_test)) d2 <- as.matrix(distances::distances(coords_test)) @@ -63,12 +62,12 @@ wSIROptimisation = function(exprs_train, current_cd <- .spearman_correlation(k1, k2) results <- c(results, cd = current_cd) } - if ("DC" %in% metrics) { + if ("DC" %in% evalmetrics) { current_dc <- Rfast::dcor(as.matrix(projected_test), as.matrix(coords_test))$dcor results <- c(results, dc = current_dc) } - if ("ncol" %in% metrics) { + if ("ncol" %in% evalmetrics) { current_ncol <- ncol(projected_test) ncol_vals <- current_ncol results <- c(results, ncol = ncol_vals) diff --git a/R/wSIRSpecifiedParams.R b/R/wSIRSpecifiedParams.R index b828775..3613892 100644 --- a/R/wSIRSpecifiedParams.R +++ b/R/wSIRSpecifiedParams.R @@ -16,10 +16,7 @@ #' each spatial axis, yielding 4^2 = 16 tiles. Default is 8, suggested minimum of 3. Suggest to tune this parameter. #' @param alpha integer to specify desired strength of spatial correlation. Suggest to tune this parameter on testing #' dataset among e.g values c(0,2,4,8). alpha = 0 gives SIR implementation. Larger values give stronger spatial correlations. -#' @param maxDirections integer for upper limit on number of directions to include in low-dimensional space. Use if you -#' need less than a certain number of dimensions for downstream analyes -#' @param varThreshold numeric proportion of eigenvalues of variance in t(X_H) %*% W %*% X_H to retain. Default is 0.95. -#' Select higher threshold to include more dimensions, lower threshold to include less dimensions. +#' @param ... arguments passed to sirCategorical #' #' @return wSIR object which is a list containing 5 (named) positions. 1) scores matrix containing low-dimensional #' representation of X from wSIR algorithm. Dimension n * d, where d is chosen to include at least varThreshold proportion of variance. @@ -30,6 +27,7 @@ #' 5) evalues vector containing p eigenvalues of t(X_H) %*% W %*% X_H. varThreshold parameter works on these evalues, #' such that e.g the first j directions are included if the sum of the first j evalues equals 0.95% of the sum of all evalues. #' +#' @importFrom methods is #' @keywords internal wSIRSpecifiedParams = function(X, coords, @@ -37,14 +35,14 @@ wSIRSpecifiedParams = function(X, samples = rep(1, nrow(coords)), slices = 8, alpha = 4, - maxDirections = 50, + # maxDirections = 50, # varThreshold = 0.95 ... ) { if (!is.null(group)) { - if (class(group) != "factor") { + if (methods::is(group, "factor")) { message("group is not a factor, setting as.factor") group <- factor(group) } @@ -80,8 +78,8 @@ wSIRSpecifiedParams = function(X, wsir_obj <- sirCategorical(X = X, Y = tile_allocation, - directions = maxDirections, W = W, + # maxDirections = maxDirections, # varThreshold = varThreshold ... ) diff --git a/man/exploreWSIRParams.Rd b/man/exploreWSIRParams.Rd index 69c64cd..e16e0d1 100644 --- a/man/exploreWSIRParams.Rd +++ b/man/exploreWSIRParams.Rd @@ -5,20 +5,19 @@ \title{exploreWSIRParams function} \usage{ exploreWSIRParams( - exprs, + X, coords, samples = rep(1, nrow(coords)), alpha_vals = c(0, 2, 4, 10), slice_vals = c(5, 10, 15, 20), - varThreshold = 0.95, - maxDirections = 50, metric = "DC", nrep = 5, - param = MulticoreParam(workers = 1) + param = MulticoreParam(workers = 1), + ... ) } \arguments{ -\item{exprs}{matrix containing normalised gene expression data including n cells and p genes, dimension n * p.} +\item{X}{matrix containing normalised gene expression data including n cells and p genes, dimension n * p.} \item{coords}{dataframe containing spatial positions of n cells in 2D space. Dimension n * 2. Column names must be c("x", "y").} @@ -35,12 +34,7 @@ but can use non-integers. Values must be non-negative.} \item{slice_vals}{vector of integers as the values of parameter slices to use in WSIR. Suggest maximum value in the vector to be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an average of at least 10 cells per tile in the training set.} -\item{varThreshold}{numeric proportion of variance in \code{t(X_H) \%*\% W \%*\% X_H} to retain. Must be between 0 and 1. Default is 0.95. -Select higher threshold to include more dimensions, lower threshold to include less dimensions.} - -\item{maxDirections}{integer for the maximum number of directions to include in the low-dimensional embedding. Default is 50.} - -\item{metric}{evaluation metric to use for parameter tuning to select optimal parameter combination. String, use "DC" to +\item{metric}{character single value. Evaluation metric to use for parameter tuning to select optimal parameter combination. String, use "DC" to use distance correlation, "CD" to use correlation of distances, or "ncol" for the number of dimensions in the low-dimensional embedding. Default is "DC".} @@ -48,6 +42,8 @@ embedding. Default is "DC".} \item{param}{parallel computing setup for bplapply from BiocParallel package. Default is to use a single core, hence default value is MulticoreParam(workers = 1)} + +\item{...}{arguments passed on to wSIROptimisation} } \value{ List with five slots, named "plot", "message", "best_alpha", "best_slices" and "results_dataframe". @@ -73,7 +69,7 @@ This pair of slices and alpha can be used for your downstream tasks. } \examples{ data(MouseData) -explore_params = exploreWSIRParams(exprs = sample1_exprs, +explore_params = exploreWSIRParams(X = sample1_exprs, coords = sample1_coords, alpha_vals = c(0,2,4,8), slice_vals = c(3,6,10)) diff --git a/man/generateUmapFromWSIR.Rd b/man/generateUmapFromWSIR.Rd index 16b3239..29803b0 100644 --- a/man/generateUmapFromWSIR.Rd +++ b/man/generateUmapFromWSIR.Rd @@ -30,7 +30,7 @@ wsir_obj = wSIR(X = sample1_exprs, umap_coords = generateUmapFromWSIR(WSIR = wsir_obj) top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 4) # create top genes object umap_plot = plotUmapFromWSIR(umap_coords = umap_coords, - exprs = sample1_exprs, + X = sample1_exprs, highest_genes = top_genes_obj, n_genes = 4) umap_plot diff --git a/man/plotUmapFromWSIR.Rd b/man/plotUmapFromWSIR.Rd index 7353b33..9211e90 100644 --- a/man/plotUmapFromWSIR.Rd +++ b/man/plotUmapFromWSIR.Rd @@ -5,7 +5,7 @@ \title{plotUmapFromWSIR} \usage{ plotUmapFromWSIR( - exprs, + X, umap_coords, highest_genes = NULL, genes = NULL, @@ -14,17 +14,17 @@ plotUmapFromWSIR( ) } \arguments{ -\item{exprs}{matrix containing normalised gene expression data including n cells and p genes, dimension n * p.} +\item{X}{matrix containing normalised gene expression data including n cells and p genes, dimension n * p.} \item{umap_coords}{UMAP coordinates for each cell that is output of generateUmapFromWSIR function. The UMAP coordinates can be based on any dimension reduction method, e.g they could be the UMAP coordinates computed on the WSIR dimension reduction of the gene expression data, or on the PCs (principal components), or on any other low-dimensional matrix. Must be -a matrix of dimension nrow(exprs) * 2.} +a matrix of dimension nrow(X) * 2.} \item{highest_genes}{output from findTopGenes function. Default is NULL so an error message can easily be thrown if genes and highest_genes are both not provided.} -\item{genes}{vector with gene names (must all be in colnames(exprs)) you wish to show in the UMAP plot. The cells +\item{genes}{vector with gene names (must all be in colnames(X)) you wish to show in the UMAP plot. The cells in those plots will be coloured by their expression values for the genes you provide here. Must provide either genes or highest_genes parameter (not both): provide genes if you want to visualise a few specific genes, provide highest_genes if you want to visualise the genes that are found to be the most important to the WSIR directions. Default is NULL @@ -54,7 +54,7 @@ wsir_obj = wSIR(X = sample1_exprs, umap_coords = generateUmapFromWSIR(WSIR = wsir_obj) top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 4) # create top genes object umap_plot = plotUmapFromWSIR(umap_coords = umap_coords, - exprs = sample1_exprs, + X = sample1_exprs, highest_genes = top_genes_obj, n_genes = 4) umap_plot diff --git a/man/sirCategorical.Rd b/man/sirCategorical.Rd index d15f5c7..151e19a 100644 --- a/man/sirCategorical.Rd +++ b/man/sirCategorical.Rd @@ -4,7 +4,7 @@ \alias{sirCategorical} \title{sirCategorical} \usage{ -sirCategorical(X, Y, directions = 50, W = NULL, varThreshold = 0.95) +sirCategorical(X, Y, maxDirections = 50, W = NULL, ...) } \arguments{ \item{X}{matrix of normalised gene expression data for n genes across p cells.} @@ -12,14 +12,11 @@ sirCategorical(X, Y, directions = 50, W = NULL, varThreshold = 0.95) \item{Y}{dataframe with 1 column named "coordinate" that is the tile allocation for each cell. There should be up to slices^2 unique tile IDs in this column.} -\item{directions}{Integer to specify maximum number of directions to retain in thee low-dimensional embedding +\item{maxDirections}{Integer to specify maximum number of directions to retain in thee low-dimensional embedding of the data. Use if you need at most a certain number for a downstream task.} \item{W}{Weight matrix created by createWeightMatrix. Entry (i,j) represents the spatial correlation level between tiles i and j. The diagonal values should be all 1. If not provided, SIR implementation will be used.} - -\item{varThreshold}{numeric value specifying the desired proportion of variance to retain. If e.g 95\% (default), -then number of directions used will be such that their eigenvalues add up to 95\% of the sum of all eigenvalues.} } \value{ list of outputs with 5 named slots. They are the same as the output of the wSIR function: this is diff --git a/man/sirPCA.Rd b/man/sirPCA.Rd index 7ce7d9d..af9ecf3 100644 --- a/man/sirPCA.Rd +++ b/man/sirPCA.Rd @@ -6,27 +6,27 @@ \usage{ sirPCA( sliced_data, - directions, + maxDirections = 50, W = diag(nrow(sliced_data)), - varThreshold = 0.95 + varThreshold = 0.99 ) } \arguments{ \item{sliced_data}{matrix of scaled slice means} -\item{directions}{integer, number of directions we want in our final low-dimensional Z.} +\item{maxDirections}{integer (default 50), number of directions we want in our final low-dimensional Z.} \item{W}{matrix of slice weights. Output of createWeightMatrix function.} -\item{varThreshold}{numeric proportion of eigenvalues of variance in t(X_H) \%\emph{\% W \%}\% X_H to retain. -Default is 0.95. Select higher threshold to include more dimensions, lower threshold to include less dimensions.} +\item{varThreshold}{numeric proportion of eigenvalues of variance in \code{t(X_H) \%*\% W \%*\% X_H} to retain. +Default is 0.99. Select higher threshold to include more dimensions, lower threshold to include less dimensions.} } \value{ -list containing: 1) "eigenvectors" matrix of eigenvectors of (X^H)^t \%\emph{\% W \%}\% (X^H) +list containing: 1) "eigenvectors" matrix of eigenvectors of \code{(X^H)^t \%*\% W \%*\% (X^H)} 2) "d" integer, selected number of dimensions -3) "evalues" vector of eigenvalues of (X^H)^t \%\emph{\% W \%}\% (X^H) +3) "evalues" vector of eigenvalues of \code{(X^H)^t \%*\% W \%*\% (X^H)} } \description{ -This function performs eigendecomposition on (X^H)^t \%\emph{\% W \%}\% (X^H), where X^H is matrix of scaled slice means. +This function performs eigendecomposition on \code{(X^H)^t \%*\% W \%*\% (X^H)}, where \code{X^H} is matrix of scaled slice means. } \keyword{internal} diff --git a/man/visualiseWSIRDirections.Rd b/man/visualiseWSIRDirections.Rd index b8f89c7..edcfd08 100644 --- a/man/visualiseWSIRDirections.Rd +++ b/man/visualiseWSIRDirections.Rd @@ -42,7 +42,8 @@ wsir_obj = wSIR(X = sample1_exprs, optim_params = FALSE, alpha = 4, slices = 6) # create wsir object -vis_obj = visualiseWSIRDirections(coords = sample1_coords, WSIR = wsir_obj, dirs = 8) # create visualisations +vis_obj = visualiseWSIRDirections(coords = sample1_coords, +WSIR = wsir_obj, dirs = 8) # create visualisations vis_obj } diff --git a/man/wSIR.Rd b/man/wSIR.Rd index 269a97a..66f404d 100644 --- a/man/wSIR.Rd +++ b/man/wSIR.Rd @@ -7,16 +7,13 @@ wSIR( X, coords, - samples = rep(1, nrow(coords)), - slices = 8, - alpha = 4, - maxDirections = 50, - varThreshold = 0.95, - optim_params = TRUE, + optim_params = FALSE, alpha_vals = c(0, 1, 2, 4, 8, 12), slice_vals = c(3, 5, 7, 10, 15, 20), metric = "DC", - nrep = 5 + nrep = 5, + verbose = FALSE, + ... ) } \arguments{ @@ -24,35 +21,17 @@ wSIR( \item{coords}{dataframe containing spatial positions of n cells in 2D space. Dimension n * 2. Column names must be c("x", "y").} -\item{samples}{sample ID of each cell. In total, must have length equal to the number of cells. For example, if -your dataset has 10000 cells, the first 5000 from sample 1 and the remaining 5000 from sample 2, you would write -samples = c(rep(1, 5000), rep(2, 5000)) to specify that the first 5000 cells are sample 1 and the remaining are sample 2. -Default is that all cells are from sample 1. Sample IDs can be of any format: for the previous example, you could write -samples = c(rep("sample 1", 5000), rep("sample 2", 5000)), and the result would be the same.} - -\item{slices}{integer for number of slices on each axis of tissue. For example, slices = 4 creates 4 slices along -each spatial axis, yielding 4^2 = 16 tiles. Default is 8, suggested minimum of 3. Suggest to tune this parameter.} - -\item{alpha}{integer to specify desired strength of spatial correlation. Suggest to tune this parameter on testing -dataset among e.g values c(0,2,4,8). alpha = 0 gives SIR implementation. Larger values give stronger spatial correlations.} - -\item{maxDirections}{integer for upper limit on number of directions to include in low-dimensional space. Use if you -need less than a certain number of dimensions for downstream analyes} - -\item{varThreshold}{numeric proportion of eigenvalues of variance in t(X_H) \%\emph{\% W \%}\% X_H to retain. Default is 0.95. -Select higher threshold to include more dimensions, lower threshold to include less dimensions.} - -\item{optim_params}{logical, if you would like wSIR to automatically optimise parameters slices and alpha based on -either distance correlation or correlation of distances as evaluation metric. Default is TRUE. If your downstream +\item{optim_params}{logical default FALSE. If you would like wSIR to automatically optimise parameters slices and alpha based on +either distance correlation or correlation of distances as evaluation metric. If your downstream task is quite different to either of those metrics, then we suggest you tune those two parameters yourself using your specific task and evaluation metric. In that case, determine your optimal slices and alpha values and then use them in the relevant function, then setting optim_params = FALSE.} -\item{alpha_vals}{If you have optim_params = TRUE, then this is the values of alpha to optimise over in WSIR. 0 +\item{alpha_vals}{If you have optim_params = TRUE, then this is the values of alpha to optimise over in wSIR. 0 gives Sliced Inverse Regression (SIR) implementation, and larger values represent stronger spatial correlation. Suggest to use integers for interpretability, but can use non-integers. Values must be non-negative.} -\item{slice_vals}{If you have optim_params = TRUE, then this is the values of slices to optimise over in WSIR. +\item{slice_vals}{If you have optim_params = TRUE, then this is the values of slices to optimise over in wSIR. Suggest maximum value in the vector to be no more than around \eqn{\sqrt{n/20}}, as this upper bound ensures an average of at least 10 cells per tile in the training set.} @@ -61,15 +40,19 @@ either "DC" to use distance correlation or "CD" to use correlation of distances. \item{nrep}{If optim_params = TRUE, this is the integer for the number of train/test splits of the data to perform during optimisation of parameters slices and alpha.} + +\item{verbose}{logical (default FALSE) whether progress messages should be printed} + +\item{...}{arguments passing to \code{exploreWSIRParams} and \code{wSIRSpecifiedParams}} } \value{ wSIR object which is a list containing 5 (named) positions. 1) scores matrix containing low-dimensional -representation of X from wSIR algorithm. Dimension n * d, where d is chosen to include at least varThreshold proportion of variance. -2) directions matrix containing WSIR directions, dimension p * d. Use this to project new data into low-dimensional space via X_new \%\emph{\% directions. +representation of X from wSIR algorithm. Dimension \code{n * d}, where d is chosen to include at least varThreshold proportion of variance. +2) directions matrix containing wSIR directions, dimension \code{p * d}. Use this to project new data into low-dimensional space via X_new \%*\% directions. 3) estd integer stating how many dimensions in the computed low-dimensional space are needed to account for varThreshold proportion of variance. Same as number of dimensions in scores. 4) W matrix weight matrix from cells_weight_matrix2 function -5) evalues vector containing p eigenvalues of t(X_H) \%}\% W \%*\% X_H. varThreshold parameter works on these evalues, +5) evalues vector containing p eigenvalues of \code{t(X_H) \%*\% W \%*\% X_H}. varThreshold parameter works on these evalues, such that e.g the first j directions are included if the sum of the first j evalues equals 0.95\% of the sum of all evalues. } \description{ @@ -84,7 +67,7 @@ wsir_obj = wSIR(X = sample1_exprs, optim_params = TRUE, alpha_vals = c(0,2,4), slice_vals = c(3,6,10), - metric = "CD", + metric = "DC", nrep = 1) # create wsir object } diff --git a/man/wSIROptimisation.Rd b/man/wSIROptimisation.Rd index 9ad1620..2cce788 100644 --- a/man/wSIROptimisation.Rd +++ b/man/wSIROptimisation.Rd @@ -13,8 +13,8 @@ wSIROptimisation( slices, alpha, maxDirections, - varThreshold, - metrics = c("CD", "DC", "ncol") + evalmetrics = c("CD", "DC", "ncol"), + ... ) } \arguments{ @@ -39,12 +39,11 @@ parameter using exploreWSIRParams() function.} \item{maxDirections}{integer for the maximum number of directions to include in the low-dimenensional embedding. Default is 50.} -\item{varThreshold}{numeric proportion of variance in \code{t(X_H) \%*\% W \%*\% X_H} to retain. Must be between 0 and 1. Default is 0.95. -Select higher threshold to include more dimensions, lower threshold to include less dimensions.} - -\item{metrics}{evaluation metrics to use for parameter tuning. String, options are any or all of: "DC" to use distance +\item{evalmetrics}{evaluation metrics to use for parameter tuning. String, options are any or all of: "DC" to use distance correlation; "CD" to use correlation of distances; "ncol" to use number of columns in low-dimensional embedding. Default is all three, specified by metrics = c("DC", "CD", "ncol").} + +\item{...}{arguments passed to wSIRSpecifiedParams} } \value{ Average metric value for the selected metric(s) over each train/test split. diff --git a/man/wSIRSpecifiedParams.Rd b/man/wSIRSpecifiedParams.Rd index e036e73..608f3ea 100644 --- a/man/wSIRSpecifiedParams.Rd +++ b/man/wSIRSpecifiedParams.Rd @@ -7,11 +7,11 @@ wSIRSpecifiedParams( X, coords, + group = NULL, samples = rep(1, nrow(coords)), slices = 8, alpha = 4, - maxDirections = 50, - varThreshold = 0.95 + ... ) } \arguments{ @@ -19,6 +19,8 @@ wSIRSpecifiedParams( \item{coords}{dataframe containing spatial positions of n cells in 2D space. Dimension n * 2. Column names must be c("x", "y").} +\item{group}{a factor indicating group level information for cells within and across samples (e.g. cell type).} + \item{samples}{sample ID of each cell. In total, must have length equal to the number of cells. For example, if your dataset has 10000 cells, the first 5000 from sample 1 and the remaining 5000 from sample 2, you would write samples = c(rep(1, 5000), rep(2, 5000)) to specify that the first 5000 cells are sample 1 and the remaining are sample 2. @@ -31,11 +33,7 @@ each spatial axis, yielding 4^2 = 16 tiles. Default is 8, suggested minimum of 3 \item{alpha}{integer to specify desired strength of spatial correlation. Suggest to tune this parameter on testing dataset among e.g values c(0,2,4,8). alpha = 0 gives SIR implementation. Larger values give stronger spatial correlations.} -\item{maxDirections}{integer for upper limit on number of directions to include in low-dimensional space. Use if you -need less than a certain number of dimensions for downstream analyes} - -\item{varThreshold}{numeric proportion of eigenvalues of variance in t(X_H) \%\emph{\% W \%}\% X_H to retain. Default is 0.95. -Select higher threshold to include more dimensions, lower threshold to include less dimensions.} +\item{...}{arguments passed to sirCategorical} } \value{ wSIR object which is a list containing 5 (named) positions. 1) scores matrix containing low-dimensional diff --git a/src/wSIR.so b/src/wSIR.so index a33c0e2abf08661e251bb0e914d30a715a4e164a..9f9b338593f947eb91ae476712dc4cb5cfe611ef 100755 GIT binary patch delta 174 zcmV;f08#&lnhS`U3$S1Y5V90&Y1wo)MWg+>hFvq55rc3Bhj0b~w{Qjm2rdIHlIyp7 zGy?q`122;6w|h|n>2)9?%|8G2ICY&m&oEqZgv@t7$#G0sLAZd#U1|uYtBKOLgvSCv z1R%^^E;LaK^7LTSUd@lW7G+ev#tHhZv^`xCLG=9ne5jY?%K{)ErU|ME-?UJ{kj75V capg4T&n{2m7kt!Fv&d^Lz;A*=mn6&rA_%pZ>w|Cxhj0b~w{Qjm2rdKpFzL5@ zGy?q`1N$)Pw|h|n>2)CAUZ$DX{+Y7+RK^phUsS$H;O@sOqUldR+4?Aq81Fu}gvSCv z1R(0bi?E=|A{idrWf4NVt!Ql1vb(F)KrEfMXbmDME%cY<%K{)E^HtPzWUi7_L6&i# cd|19vn6*Ym!;<9lS(FajBgK5tmn6&r - - - - - - - - - - - - - - - - - -Weighted Sliced Inverse Regression (WSIR) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-
-
-
-
- -
- - - - - - - -
library(wSIR) # package itself
-library(magrittr) # for %>% 
-library(ggplot2) # for ggplot
-library(Rfast) # for distance correlation
-library(doBy) # for which.maxn
-library(vctrs) # for vec_rep_each
-library(umap) # for umap
-library(class) # for example wSIR application
-
-

1 Introduction

-

Weighted Sliced Inverse Regression (wSIR) is a supervised dimension reduction -technique for spatial transcriptomics data. This method uses each cell’s gene -expression values as covariates and spatial position as the response. This -allows us to create a low-dimensional representation of the gene expression -data that retains the information about spatial patterns that is present in -the gene expression data. The resulting mapping from gene expression data to -a spatially aware low-dimensional embedding can be used to project new -single-cell gene expression data into a low-dimensional space which preserves -the ability to predict each cell’s spatial location from its low-dimensional -embedding.

-
-

1.1 Method Overview

-

wSIR is an extension of the supervised dimension reduction technique of Sliced -Inverse Regression (SIR).

-

SIR is an existing supervised dimension reduction method which works by -grouping together the observations with similar values for the response. For -spatial transcriptomics data, this means grouping all the cells into a certain -number of tiles based on their spatial position. For example, if we use 4 -tiles, then the cells in the top right quadrant of the image/tissue go to one -group, those in the top left to another, and so on. Each of those groups is -then summarised by averaging the expression level of all cells in each group -for each of the genes. From there, eigendecomposition is performed on the -resulting matrix of tile-gene means, then returning the SIR directions and -SIR scores.

-

The motivation behind wSIR is that SIR only uses each cell’s spatial position -when we are separating the cells into the given number of groups/tiles. Once -those groups are created, we lose the fact that some groups may be more -spatially related (if they come from adjacent tiles) than other groups (if -they come from opposite sides of the tissue). wSIR uses a weight matrix to -incorporate the spatial correlation between all pairs of cells in the SIR -algorithm. This matrix has dimension H*H, where H is the number of tiles, -and the (i,j)th entry represents the distance between tiles i and j. This -matrix is incorporated into the eigendeomposition step. The wSIR output has the -same structure as the SIR output.

-
-
-

1.2 Vignette Overview

-

In this vignette, we will demonstrate how to use WSIR to obtain a -low-dimensional embedding of gene expression data. We will then explore this -embedding using the package’s built-in functions. However, this low-dimensional -matrix is more importantly used for your own downstream tasks that would -benefit from a lower-dimensional representation of gene expression data that -preserves information about each cell’s spatial location. We perform some basic -downstream analysis to demonstrate the practicality of wSIR.

-
-
-
-

2 Data

-

We use data from Lohoff et al, 2021. -The code below would allow you to download the mouse data yourself using -the MouseGastrulationData and scran R packages. We randomly sample 20% of the -cells from each of the three biological replicate samples.

-
# Packages needed to download data
-#library(scran) # for logNormCounts
-#library(MouseGastrulationData) # to download the data for this vignette
-
-set.seed(2024)
-
-seqfish_data_sample1 <- LohoffSeqFISHData(samples = c(1,2))
-seqfish_data_sample1 = logNormCounts(seqfish_data_sample1) # log transform variance stabilising
-rownames(seqfish_data_sample1) <- rowData(seqfish_data_sample1)[,"SYMBOL"] # change rownames to gene symbols that are consistent
-sample1_exprs = t(assay(seqfish_data_sample1, "logcounts")) # extract matrix of gene expressions
-sample1_coords = spatialCoords(seqfish_data_sample1)[,1:2] %>% as.data.frame()
-colnames(sample1_coords) = c("x", "y")
-
-seqfish_data_sample2 <- LohoffSeqFISHData(samples = c(3,4))
-seqfish_data_sample2 = logNormCounts(seqfish_data_sample2)
-rownames(seqfish_data_sample2) <- rowData(seqfish_data_sample2)[,"SYMBOL"]
-sample2_exprs = t(assay(seqfish_data_sample2, "logcounts"))
-sample2_coords = spatialCoords(seqfish_data_sample2)[,1:2] %>% as.data.frame()
-colnames(sample2_coords) = c("x", "y")
-
-seqfish_data_sample3 <- LohoffSeqFISHData(samples = c(5,6))
-seqfish_data_sample3 = logNormCounts(seqfish_data_sample3)
-rownames(seqfish_data_sample3) <- rowData(seqfish_data_sample3)[,"SYMBOL"]
-sample3_exprs = t(assay(seqfish_data_sample3, "logcounts"))
-sample3_coords = spatialCoords(seqfish_data_sample3)[,1:2] %>% as.data.frame()
-colnames(sample3_coords) = c("x", "y")
-
-keep1 = sample(c(TRUE, FALSE), nrow(sample1_exprs), replace = TRUE, prob = c(0.2, 0.8))
-keep2 = sample(c(TRUE, FALSE), nrow(sample2_exprs), replace = TRUE, prob = c(0.2, 0.8))
-keep3 = sample(c(TRUE, FALSE), nrow(sample3_exprs), replace = TRUE, prob = c(0.2, 0.8))
-
-sample1_exprs = sample1_exprs[keep1,]
-sample1_coords = sample1_coords[keep1,]
-sample2_exprs = sample2_exprs[keep2,]
-sample2_coords = sample2_coords[keep2,]
-sample3_exprs = sample3_exprs[keep3,]
-sample3_coords = sample3_coords[keep3,]
-
-sample1_cell_types = seqfish_data_sample1$celltype[keep1]
-sample2_cell_types = seqfish_data_sample2$celltype[keep2]
-sample3_cell_types = seqfish_data_sample3$celltype[keep3]
-
-save(sample1_exprs, sample1_coords, sample1_cell_types, 
-     sample2_exprs, sample2_coords, sample2_cell_types, 
-     sample3_exprs, sample3_coords, sample3_cell_types, 
-     file = "../data/MouseData.rda", compress = "xz")
-

For this vignette and the examples for each function in wSIR, we simply load -this data that has already been saved at data/MouseData.rda.

-
data(MouseData)
-
-
-

3 Supervised dimension reduction with wSIR

-
-

3.1 Parameter Study

-

wSIR contains two parameters, alpha and slices. Parameter slices is the -number of groups along each spatial axis into which we split the observations -in the wSIR algorithm. More slices means we could pick up more spatial -information in the gene expression data, but we risk overfitting on the -training set. Parameter alpha modifies the strength of the weight matrix. -alpha = 0 is equivalent to no spatial weighting, meaning the weight matrix -becomes the identity matrix. This is equivalent to SIR. Larger alpha values -means there is more weight given to spatial correlation rather than gene -expression differences alone in the computation of the wSIR directions.

-

The function exploreWSIRParams performs wSIR over many choices of alpha and -slices to identify the most appropriate parameters moving forward. Here, we -will use sample 1 only.

-

These parameters should both be tuned over some reasonable values. The function -exploreWSIRParams computes and visualises the performance of wSIR for all -given combinations of slices and alpha. The performance is computed -from the following procedure: -1) For each combination of slices and alpha, split the data into 50% -train and 50% test (where train and test halves both include gene expression -data and cell coordinates). -2) Perform wSIR on the training set using the current combination of slices -and alpha. -3) Project the gene expression data of the testing set into low-dimensional -space using the wSIR results from step 2. -4) Evaluate wSIR’s performance by computing either the distance correlation -(“DC”, default), or the correlation of distances (“CD”) between the projected gene -expression data of the test set and the test coordinates, according to parameter -metric. -5) Repeat steps 2-4 until it has been done nrep times (nrep is a parameter -whose default is 5). Calculate the average metric value over each of the nrep -iterations for this combination of slices and alpha. -6) Repeat steps 1-5 with all other combinations of slices and alpha to -obtain an average metric value for each combination. -7) Return the combination of slices and alpha with the highest average -metric value and display a plot showing the performance for every combination -of parameters.

-

Note: a key advantage of wSIR over SIR is parameter robustness. Here, we can -see that SIR’sperformance deteriorates as you use larger values for slices. -However, wSIR’s performance is relatively stable as you vary both slices and -alpha across reasonable values (e.g. among the default values we optimise -over). In the following plot, the metric value becomes smaller as we increase -the number of slices for alpha = 0 (which corresponds to SIR). Performance is -stable for all non-zero alpha and slice combinations (which correspond to wSIR).

-
optim_obj = exploreWSIRParams(exprs = sample1_exprs, 
-                              coords = sample1_coords, 
-                              alpha_vals = c(0,2,4,8), 
-                              slice_vals = c(5,10,15))
-
## Time difference of 2.584874 secs
-## Time difference of 2.822482 secs
-## Time difference of 3.072535 secs
-## Time difference of 1.956654 secs
-## Time difference of 2.474102 secs
-## Time difference of 2.875094 secs
-## Time difference of 2.06729 secs
-## Time difference of 2.282061 secs
-## Time difference of 2.884527 secs
-## Time difference of 2.02782 secs
-## Time difference of 2.166223 secs
-## Time difference of 2.614765 secs
-
optim_obj$plot
-

-
-
-

3.2 wSIR Computation

-

We next perform wSIR using the optimal parameter combination that we found in -the previous section. We use the gene expression matrix and spatial coordinates -from sample 1 here. This returns a list of results with 5 (named) slots, whose -details can be found at ?wSIR::wSIR.

-
wsir_obj = wSIR(X = sample1_exprs, 
-                coords = sample1_coords, 
-                slices = optim_obj$best_slices, 
-                alpha = optim_obj$best_alpha,
-                optim_params = FALSE)
-
-names(wsir_obj)
-
## [1] "scores"     "directions" "estd"       "W"          "evalues"
-
-
-
-

4 wSIR Results Analysis

-

In this section, we will demonstrate how to use the built-in analysis functions -to better understand how wSIR creates a spatially-informed low-dimensional -embedding. These functions all use a wSIR result as an input. Here, we use the -output from the previous section, meaning we are studying the result -of performing wSIR on sample 1 only.

-
-

4.1 wSIR Top Genes

-

The findTopGenes function finds and plots the genes with highest loading in -the specified wSIR directions (default is direction 1). If a gene has high -loading (in terms of magnitude), it is more important to the wSIR direction. -Since the wSIR directions are designed to retain information about each cell’s -spatial position, the genes with high loading should be spatially-related genes.

-

In the plot below, we can see which genes have the highest loading in wSIR -direction 1. This is useful as it gives us an intuition about how wSIR creates -the low-dimensional embedding. We can see that some of the genes are known -spatial genes (e.g. Cdx2, Hox-), which is what we would expect to see.

-

We can simultaneously plot top genes for multiple directions, and utilise the

-
top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 8) # create top genes object
-top_genes_plot = top_genes_obj$plot # select plot
-top_genes_plot # print plot
-

-
top_genes_obj = findTopGenes(WSIR = wsir_obj, highest = 8, dirs = 2:4)
-top_genes_plot = top_genes_obj$plot
-top_genes_plot
-

-
-
-

4.2 Visualising wSIR Scores

-

The visualiseWSIRDirections function plots each cell at its spatial position, -coloured by its value for each of the specified wSIR columns. This gives us an -understanding of what each column of the low-dimensional embedding represents.

-

Below, we visualise the cells at their spatial positions, coloured by each of -the 5 wSIR directions The top left plot illustrates how, for this example, -wSIR direction 1 captures information about the “y” spatial axis, since cells -with higher “y” coordinate have low wSIR1 value, while cells with lower “y” -coordinate have higher wSIR1 value. wSIR2 is shown in the next plot over -(the one titled “2”), and we can see that wSIR column two appears to capture -information about the “x” spatial coordinate. The remaining three wSIR columns -all contain information about cell types, which we can tell by the regions of -high and low wSIR column values spread across the tissue.

-
vis_obj = visualiseWSIRDirections(coords = sample1_coords, WSIR = wsir_obj, dirs = 8) # create visualisations
-vis_obj
-

-
-
-

4.3 UMAP on low-dimensional embedding

-

The two functions generateUmapFromWSIR and plotUmapFromWSIR create and -display UMAP dimension reduction calculated on the wSIR low-dimensional -embedding. We can colour the UMAP plot (where each point represents a cell) by -its value for various genes of interest. This visualises the structure of the -wSIR dimension reduction space, which is useful to gain more intuition about -what the space represents. Specifically, we can see if the wSIR space contains -neighbourhoods of high expression for specific genes, thus better understanding -how this space is made.

-

To specify which genes we would like to include, we can use the output from -the findTopGenes function from above, which finds spatially-related genes by -ranking those with the highest loading in relevant wSIR directions. This output -is then the value for the highest_genes parameter. Otherwise, we could also -specify our own genes of interest if there are some specific genes we would -like to visualise. For example, if we wanted to visualise the expression -distribution for Cdx2 and Hoxb4, we could use genes = c("Cdx2", "Hoxb4") as -an argument in plotUmapFromWSIR (and leave highest_genes blank).

-

Below, we use the UMAP function to visualise the wSIR space computed on the -gene expression data from sample 1. We colour each cell by their values for -the 6 genes with highest value in wSIR direction 1 (as found by the -findTopGenes function previously). We can see that for some of these genes, -there are specific regions of high expression in the UMAP plots, suggesting -that the wSIR space separates cells based on their expression for those genes.

-
umap_coords = generateUmapFromWSIR(WSIR = wsir_obj)
-umap_plots = plotUmapFromWSIR(exprs = sample1_exprs,
-                              umap_coords = umap_coords,
-                              highest_genes = top_genes_obj,
-                              n_genes = 6)
-umap_plots
-

-
-
-
-

5 Projection of new data with wSIR

-

A key functionality of the wSIR package is the ability to project new -single-cell data into the wSIR low-dimensional space. This will allow for a -low-dimensional representation of gene expression data that contains -information about each cell’s spatial position even though we do not have -access to the spatial coordinates for this new data. This low-dimensional wSIR -embedding would be especially useful for downstream applications, like spatial -alignment or spatial clustering (where we don’t have spatial coordinates).

-

Here, we will demonstrate the steps for that, as well as a specific application.

-

For each projection example, we will perform wSIR on a spatial transcriptomics -dataset which includes gene expression data and spatial coordinates. We will -then project a “single-cell” dataset, which only contains the gene expression -matrix, into wSIR low-dimensional space.

-
-

5.1 Single-sample spatial dataset, single-sample single-cell dataset

-

Here, we demonstrate the steps to project a new sample single-cell dataset into -wSIR low-dimensional space having already performed wSIR on the spatial -transcriptomics dataset from the first sample.

-

We have already performed wSIR on sample 1, so here we project sample 2’s gene -expression matrix into the wSIR low-dimensional space, which will therefore -have an ability to predict sample 2’s (unknown at this stage) spatial locations.

-
sample2_low_dim_exprs = projectWSIR(wsir = wsir_obj, newdata = sample2_exprs)
-

Check the dimension of sample 2’s low-dimensional gene expression data:

-
dim(sample2_low_dim_exprs)
-
## [1] 2986    9
-

Observe some of sample 2’s low-dimensional gene expression data:

-
head(sample2_low_dim_exprs)
-
##           [,1]       [,2]        [,3]         [,4]       [,5]        [,6]
-## [1,] 1.2044363  0.3790739  1.72818610  1.797653672 -0.8684785 -0.74469382
-## [2,] 0.5738578 -0.4719606 -0.26341538  0.341280096 -0.4274574  0.34500227
-## [3,] 1.2749872  0.8741444  1.34429322 -0.009172746  0.4562988  0.42206043
-## [4,] 1.7326508  0.3322124  0.55148621 -0.732668048 -0.2272738  0.30675261
-## [5,] 2.1501634  0.8665235 -0.04544417 -0.779635209 -0.4277316  0.17938548
-## [6,] 1.2074795  1.1035173 -0.51632770 -1.048309253 -1.0852001 -0.01835717
-##          [,7]      [,8]       [,9]
-## [1,] 3.437305 0.5718475 -0.4848100
-## [2,] 1.304516 1.7813543  0.6194540
-## [3,] 2.313171 0.0540894 -0.3177639
-## [4,] 2.269962 0.5799911 -1.2385839
-## [5,] 0.899609 1.5397581  0.1789930
-## [6,] 1.244870 0.5720243  0.4845666
-

This low-dimensional gene expression data can then be used for any later tasks -which would benefit from a low-dimensional embedding of the gene expression -data for all the samples, rather than just the gene expression data.

-
-
-

5.2 Multi-sample spatial dataset, single-sample single-cell dataset

-

Here, we perform wSIR on samples 1 and 2 together. This requires the gene -expression matrices from both samples joined together into one matrix, and -the coords dataframes joined into one dataframe. We do this concatenation -using rbind. We then specify the sample IDs for each row in the joined -expression matrix and coordinates dataframe using the samples argument. This -is a vector with a “1” for each row in sample 1 and a “2” for each row in -sample 2.

-

We use the resulting wSIR output to project the gene expression data from -sample 3 into low-dimensional space. We check the dimension of the resulting -matrix.

-
wsir_obj_samples12 <- wSIR(X = rbind(sample1_exprs, sample2_exprs),
-                           coords = rbind(sample1_coords, sample2_coords),
-                           samples = c(rep(1, nrow(sample1_coords)), rep(2, nrow(sample2_coords))),
-                           slices = optim_obj$best_slices, 
-                           alpha = optim_obj$best_alpha,
-                           optim_params = FALSE)
-
-sample3_low_dim_exprs <- projectWSIR(wsir = wsir_obj_samples12, newdata = sample3_exprs)
-dim(sample3_low_dim_exprs)
-
## [1] 4607    7
-

This low-dimensional matrix can then be used for downstream tasks which would -benefit from a low-dimensional embedding of sample 3’s gene expression matrix -that contains information about each cell’s location. Examples for downstream -use include spatial alignment of single-cell and spatial gene expression data -via Tangram, using the -wSIR scores as the input rather than the (unreduced) gene expression matrix.

-

An example of a very simple application is using the wSIR scores as an input to a KNN cell type classification algorithm. This is demonstrated below, using the ‘knn’ function from ‘class’ package.

-
samples12_cell_types = append(sample1_cell_types, sample2_cell_types)
-
-knn_classification_object = knn(train = wsir_obj_samples12$scores, 
-                                test = sample3_low_dim_exprs,
-                                cl = samples12_cell_types,
-                                k = 10)
-
-tail(knn_classification_object)
-
## [1] Presomitic mesoderm Dermomyotome        Presomitic mesoderm
-## [4] Spinal cord         Spinal cord         Low quality        
-## 24 Levels: Allantois Anterior somitic tissues ... Surface ectoderm
-

In the above code, we use the wSIR scores as input for a simple KNN-based -cell type classification tool. We print the tail of the prediction vector, -demonstrating how we can use the wSIR-based low-dimensional gene expression -data from a new sample as a step in a realistic analysis pipeline.

-
- -Session Info - -
sessionInfo()
-
## R version 4.3.0 (2023-04-21)
-## Platform: x86_64-apple-darwin20 (64-bit)
-## Running under: macOS 14.5
-## 
-## Matrix products: default
-## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
-## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
-## 
-## locale:
-## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
-## 
-## time zone: Australia/Sydney
-## tzcode source: internal
-## 
-## attached base packages:
-## [1] stats     graphics  grDevices utils     datasets  methods   base     
-## 
-## other attached packages:
-##  [1] class_7.3-22       umap_0.2.10.0      vctrs_0.6.3        doBy_4.6.22       
-##  [5] Rfast_2.0.8        RcppZiggurat_0.1.6 Rcpp_1.0.11        ggplot2_3.4.2     
-##  [9] magrittr_2.0.3     wSIR_0.1.2         BiocStyle_2.30.0  
-## 
-## loaded via a namespace (and not attached):
-##  [1] sass_0.4.7            utf8_1.2.3            generics_0.1.3       
-##  [4] tidyr_1.3.0           lattice_0.21-8        digest_0.6.33        
-##  [7] evaluate_0.21         grid_4.3.0            bookdown_0.34        
-## [10] fastmap_1.1.1         jsonlite_1.8.7        Matrix_1.6-1.1       
-## [13] RSpectra_0.16-2       backports_1.4.1       BiocManager_1.30.23  
-## [16] purrr_1.0.1           fansi_1.0.4           scales_1.2.1         
-## [19] modelr_0.1.11         microbenchmark_1.4.10 jquerylib_0.1.4      
-## [22] cli_3.6.1             rlang_1.1.1           cowplot_1.1.1        
-## [25] munsell_0.5.0         withr_2.5.0           cachem_1.0.8         
-## [28] yaml_2.3.7            tools_4.3.0           parallel_4.3.0       
-## [31] dplyr_1.1.2           colorspace_2.1-0      boot_1.3-28.1        
-## [34] Deriv_4.1.3           reticulate_1.37.0     broom_1.0.5          
-## [37] png_0.1-8             R6_2.5.1              magick_2.7.4         
-## [40] lifecycle_1.0.3       MASS_7.3-60           pkgconfig_2.0.3      
-## [43] pillar_1.9.0          bslib_0.5.0           gtable_0.3.3         
-## [46] glue_1.6.2            highr_0.10            xfun_0.39            
-## [49] tibble_3.2.1          tidyselect_1.2.0      rstudioapi_0.15.0    
-## [52] knitr_1.43            farver_2.1.1          htmltools_0.5.5      
-## [55] labeling_0.4.2        rmarkdown_2.23        compiler_4.3.0       
-## [58] askpass_1.1           openssl_2.1.0
-
-
-
- - - -
-
- -
- - - - - - - - - - - - - - - - - - -