diff --git a/CITATION.cff b/CITATION.cff index 07c10d6..68f9635 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,6 +1,6 @@ cff-version: 1.2.0 title: vertesy/Seurat.utils -version: v2.8.0 +version: v2.8.5 message: >- If you use this software, please cite it using these metadata. type: software diff --git a/DESCRIPTION b/DESCRIPTION index ad64f8c..0dd42b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat.utils Title: Seurat.utils - utility functions for Seurat -Version: 2.8.0 +Version: 2.8.5 Authors@R: person("Abel", "Vertesy", , "av@imba.oeaw.ac.at", role = c("aut", "cre")) Author: Abel Vertesy [aut, cre] @@ -14,21 +14,26 @@ BugReports: https://github.com/vertesy/Seurat.utils/issues Depends: CodeAndRoll2, ggExpress, - ggplot2, magrittr, Seurat, - Stringendo + Stringendo, + tidyverse Imports: + checkmate, cowplot, dplyr, + EnhancedVolcano, foreach, + fs, + future, ggcorrplot, + ggplot2, ggpubr, ggrepel, - grDevices, + gplots, + harmony, HGNChelper, htmlwidgets, - job, MarkdownHelpers, MarkdownReports, Matrix, @@ -38,27 +43,24 @@ Imports: princurve, qs, R.utils, + RColorBrewer, readr, ReadWriter, reshape2, rstudioapi, scales, + SeuratObject, SoupX, sparseMatrixStats, stringr, tibble, - tictoc, - tidyverse, - vroom + tictoc Suggests: + clusterProfiler, DatabaseLinke.R, - EnhancedVolcano, - fs, - gplots, - princurve, - RColorBrewer, - SoupX + enrichplot, + vroom Encoding: UTF-8 -Packaged: 2024-06-21 12:31:42.585143 +Packaged: 2024-10-23 22:59:23.332565 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 diff --git a/Development/Create_the_Seurat.utils_Package.R b/Development/Create_the_Seurat.utils_Package.R index fb0944c..c646c75 100644 --- a/Development/Create_the_Seurat.utils_Package.R +++ b/Development/Create_the_Seurat.utils_Package.R @@ -1,7 +1,8 @@ ###################################################################################################### # Create_the_Seurat.utils_Package.R ###################################################################################################### -# source("~/GitHub/Packages/Seurat.utils/Development/Create_the_Seurat.utils_Package.R") +# file.edit("~/GitHub/Packages/Seurat.utils/Development/Create_the_Seurat.utils_Package.R") + # rm(list = ls(all.names = TRUE)); try(dev.off(), silent = TRUE) @@ -20,8 +21,11 @@ file.edit(config.path) source(config.path) package.name <- DESCRIPTION$'package.name' + + PackageTools::document_and_create_package(repository.dir, config_file = 'config.R') 'git add commit push to remote' +file.edit("DESCRIPTION") # Install your package ------------------------------------------------ "disable rprofile by" @@ -47,11 +51,13 @@ devtools::install_github(repo = "vertesy/Seurat.utils", upgrade = F) # BiocManager::install("MatrixGenerics") # CMD CHECK ------------------------------------------------ +devtools::check_man(repository.dir) checkres <- devtools::check(repository.dir, cran = FALSE) # Automated Codebase linting to tidyverse style ------------------------------------------------ styler::style_pkg(repository.dir) +# styler::style_file("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Visualization.R") # Extract package dependencies ------------------------------------------------ @@ -69,7 +75,7 @@ PackageTools::extract_package_dependencies(repository.dir) # Try to find and add missing @importFrom statements------------------------------------------------ devtools::load_all("~/GitHub/Packages/PackageTools/") -(ls.scripts.full.path <- list.files(file.path(repository.dir, "R"), full.names = T)) +(ls.scripts.full.path <- list.files(file.path(repository.dir, "R"), full.names = T, pattern = ".R$")) if (F) { (excluded.packages <- unlist(strsplit(DESCRIPTION$'depends', split = ", "))) for (scriptX in ls.scripts.full.path) { @@ -78,6 +84,7 @@ if (F) { } # Replaces T with TRUE and F with FALSE ------------------------------------------------ +(ls.scripts.full.path <- list.files(file.path(repository.dir, "R"), full.names = T, pattern = ".R$")) for (scriptX in ls.scripts.full.path) { PackageTools::replace_tf_with_true_false(scriptX, strict_mode = F) PackageTools::replace_short_calls(scriptX, strict_mode = F) diff --git a/Development/config.R b/Development/config.R index 80eaa19..129d4e7 100644 --- a/Development/config.R +++ b/Development/config.R @@ -1,19 +1,21 @@ # Configuration for the Package DESCRIPTION <- list( package.name = "Seurat.utils", - version = "2.8.0", + version = "2.8.5", title = "Seurat.utils - utility functions for Seurat", description = "Seurat.utils Is a collection of utility functions for Seurat single cell analysis. Functions allow 3D plotting, visualisation of statistics & QC, the automation / multiplexing of plotting, interaction with the Seurat object, etc. Some functionalities require functions from CodeAndRoll and MarkdownReports libraries.", - depends = "ggplot2, Seurat, Stringendo, CodeAndRoll2, ggExpress, magrittr", - imports = "cowplot, ReadWriter, dplyr, ggcorrplot, ggpubr, ggrepel, grDevices, HGNChelper, - htmlwidgets, MarkdownHelpers, MarkdownReports, Matrix, matrixStats, princurve, pheatmap, + depends = "tidyverse, Seurat, Stringendo, CodeAndRoll2, ggExpress, magrittr", + imports = "cowplot, dplyr, ggcorrplot, ggpubr, ggrepel, HGNChelper, htmlwidgets, + Matrix, matrixStats, princurve, pheatmap, R.utils, readr, reshape2, scales, SoupX, sparseMatrixStats, stringr, tibble, tictoc, - plotly, rstudioapi, vroom, job, qs, foreach, tidyverse", - suggests = "SoupX, princurve, EnhancedVolcano, DatabaseLinke.R, RColorBrewer, fs, gplots", - + ReadWriter, MarkdownHelpers, MarkdownReports, + plotly, qs, foreach, harmony, EnhancedVolcano, rstudioapi, + RColorBrewer, SeuratObject, checkmate, fs, future, + ggplot2, gplots", # , grDevices + suggests = "clusterProfiler, enrichplot, DatabaseLinke.R, vroom", #, job author.given = "Abel", author.family = "Vertesy", author.email = "av@imba.oeaw.ac.at", diff --git a/NAMESPACE b/NAMESPACE index e1d0321..b74c461 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -193,17 +193,14 @@ export(ww.get.1st.Seur.element) export(xread) export(xsave) import(Seurat) -import(enrichplot) import(tidyverse) importFrom(CodeAndRoll2,as_tibble_from_namedVec) importFrom(CodeAndRoll2,clip.outliers.at.percentile) importFrom(CodeAndRoll2,df.col.2.named.vector) importFrom(CodeAndRoll2,grepv) importFrom(CodeAndRoll2,translate) -importFrom(DatabaseLinke.R,qHGNC) importFrom(EnhancedVolcano,EnhancedVolcano) importFrom(HGNChelper,checkGeneSymbols) -importFrom(MarkdownHelpers,color_check) importFrom(MarkdownHelpers,llogit) importFrom(MarkdownHelpers,llprint) importFrom(MarkdownHelpers,ww.assign_to_global) @@ -249,7 +246,6 @@ importFrom(Seurat,ScaleData) importFrom(Seurat,TransferData) importFrom(Seurat,VariableFeatures) importFrom(SeuratObject,UpdateSeuratObject) -importFrom(SoupX,SoupChannel) importFrom(Stringendo,iprint) importFrom(Stringendo,kollapse) importFrom(Stringendo,kpp) @@ -258,7 +254,6 @@ importFrom(Stringendo,percentage_formatter) importFrom(checkmate,assert_character) importFrom(checkmate,assert_class) importFrom(checkmate,assert_string) -importFrom(clusterProfiler,enrichGO) importFrom(cowplot,ggdraw) importFrom(cowplot,ggsave2) importFrom(cowplot,plot_grid) @@ -314,8 +309,8 @@ importFrom(ggrepel,geom_text_repel) importFrom(gplots,rich.colors) importFrom(graphics,par) importFrom(graphics,text) +importFrom(harmony,RunHarmony) importFrom(htmlwidgets,saveWidget) -importFrom(job,job) importFrom(matrixStats,rowSums2) importFrom(pheatmap,pheatmap) importFrom(plotly,layout) @@ -341,4 +336,3 @@ importFrom(tibble,rownames_to_column) importFrom(tictoc,tic) importFrom(tictoc,toc) importFrom(tools,toTitleCase) -importFrom(vroom,vroom) diff --git a/R/Seurat.Utils.Metadata.R b/R/Seurat.Utils.Metadata.R index a475b2f..8887bde 100644 --- a/R/Seurat.Utils.Metadata.R +++ b/R/Seurat.Utils.Metadata.R @@ -40,16 +40,18 @@ addTranslatedMetadata <- function(obj = combined.obj, ...) { # Input assertions stopifnot(is(obj, "Seurat"), - is.character(orig.ident) && length(orig.ident) == 1, - is.character(suffix) | is.null(suffix), - "Not a named vec was provided!" = !is.null(names(translation_as_named_vec)) + is.character(orig.ident) && length(orig.ident) == 1, + is.character(suffix) | is.null(suffix), + "Not a named vec was provided!" = !is.null(names(translation_as_named_vec)) ) message("new_col_name: ", new_col_name) # Translate metadata - obj@meta.data[[new_col_name]] <- CodeAndRoll2::translate(vec = as.character(obj@meta.data[[orig.ident]]), - old = names(translation_as_named_vec), - new = translation_as_named_vec) + obj@meta.data[[new_col_name]] <- CodeAndRoll2::translate( + vec = as.character(obj@meta.data[[orig.ident]]), + old = names(translation_as_named_vec), + new = translation_as_named_vec + ) # message("new_col_name: ", new_col_name) # clUMAP(ident = new_col_name, obj = obj, caption = NULL, ...) @@ -112,7 +114,7 @@ metaColnameExists <- function(col_name, obj = combined.obj) { #' @description Retrieves a specified metadata column from a Seurat object and returns it as a named vector. #' @param col A string specifying the name of the metadata column to be retrieved. Default: 'batch'. #' @param obj A Seurat object from which the metadata column will be retrieved. Default: combined.obj. -#' @param as_numeric A logical flag indicating whether the returned values should be converted to numeric format. Default: FALSE (FALSE). +#' @param as_numeric A logical flag indicating whether the returned values should be converted to numeric format. Default: `FALSE`. (FALSE). #' @return A named vector containing the values from the specified metadata column. If 'as_numeric' is TRUE, the values are converted to numeric format. #' @examples #' \dontrun{ @@ -244,7 +246,7 @@ calculateAverageMetaData <- function( #' Default: c("AAV.detected.min2", "AAV.detected"). #' @param match.values A named vector where names correspond to `meta.features` and values are #' the strings to match against. Default: c("AAV.detected.min2" = "AAV", "AAV.detected" = "AAV"). -#' @param verbose A logical value indicating whether to print detailed output. Default: TRUE. +#' @param verbose A logical value indicating whether to print detailed output. Default: `TRUE`. #' @param max.categ The maximum number of categories allowed before stopping. Default: 30. #' #' @return A data frame with the category as the first column and the subsequent columns showing @@ -327,9 +329,9 @@ getMedianMetric.lsObj <- function(ls.obj = ls.Seurat, n.datasets = length(ls.Seu #' #' @description Retrieves cell IDs from a specified metadata column of a Seurat object, where the cell ID matches a provided list of values. The matching operation uses the `%in%` operator. #' @param ident A string specifying the name of the metadata column from which to retrieve cell IDs. Default: 'res.0.6'. -#' @param ident_values A vector of values to match in the metadata column. Default: NA. +#' @param ident_values A vector of values to match in the metadata column. Default: `NA`. #' @param obj The Seurat object from which to retrieve the cell IDs. Default: combined.obj. -#' @param inverse A boolean value indicating whether to inverse the match, i.e., retrieve cell IDs that do not match the provided list of ident_values. Default: FALSE. +#' @param inverse A boolean value indicating whether to inverse the match, i.e., retrieve cell IDs that do not match the provided list of ident_values. Default: `FALSE`. #' @return A vector of cell IDs that match (or don't match, if `inverse = TRUE`) the provided list of values. #' @examples #' \dontrun{ @@ -371,9 +373,8 @@ getCellIDs.from.meta <- function(ident = GetClusteringRuns()[1], #' @return Modified Seurat object with additional metadata. #' @importFrom Seurat AddMetaData #' @export -addMetaDataSafe <- function(obj, metadata, col.name, overwrite = FALSE, verbose = F, +addMetaDataSafe <- function(obj, metadata, col.name, overwrite = FALSE, verbose = FALSE, strict = TRUE) { - if (verbose) message("Running addMetaDataSafe...") # browser() stopifnot( @@ -381,14 +382,14 @@ addMetaDataSafe <- function(obj, metadata, col.name, overwrite = FALSE, verbose "Column already exists" = ((!col.name %in% colnames(obj@meta.data)) | overwrite) ) equal_length <- length(metadata) == ncol(obj) - iprint('strict', strict) - if(strict) stopifnot("Metadata or object too short" = equal_length) + iprint("strict", strict) + if (strict) stopifnot("Metadata or object too short" = equal_length) if (!is.null(names(metadata))) { - if(verbose) print(head(names(metadata))) - if(verbose) print(head(colnames(obj))) - if(strict) stopifnot(names(metadata) == colnames(obj)) + if (verbose) print(head(names(metadata))) + if (verbose) print(head(colnames(obj))) + if (strict) stopifnot(names(metadata) == colnames(obj)) } else { message("No CBCs associated with new metadata. Assuming exact match.") if (!equal_length) stop("Not equal lenght, no CBCs") @@ -433,17 +434,17 @@ addMetaDataSafe <- function(obj, metadata, col.name, overwrite = FALSE, verbose #' @export create.metadata.vector <- function(vec, obj = combined.obj, fill = NA, min.intersect = min(length(vec), ncol(obj), 100)) { - - stopifnot(is.vector(vec), is(obj, "Seurat") ) + stopifnot(is.vector(vec), is(obj, "Seurat")) cells_in_obj <- colnames(obj) cells_in_vec <- names(vec) stopifnot(is.character(cells_in_obj), is.character(cells_in_vec)) cells_in_both <- length(intersect(cells_in_vec, cells_in_obj)) - if(cells_in_both < min.intersect) { - message(ncol(obj), " cells in obj: ", kppc(cells_in_obj[1:3]) ,"\n", - length(vec), " cells in vec: ", kppc(cells_in_vec[1:3]) ,"\n", - cells_in_both, " cells in both.\n" + if (cells_in_both < min.intersect) { + message( + ncol(obj), " cells in obj: ", kppc(cells_in_obj[1:3]), "\n", + length(vec), " cells in vec: ", kppc(cells_in_vec[1:3]), "\n", + cells_in_both, " cells in both.\n" ) stop("Intersection between vec and obj is less than min.intersect.") } @@ -475,8 +476,8 @@ create.metadata.vector <- function(vec, obj = combined.obj, fill = NA, #' @param gene.symbol.pattern Regular expression pattern to match gene symbols. Default: c("^MT\\.|^MT-", FALSE)[1] #' @param assay Name of the assay to be used. Default: 'RNA' #' @param layer Name of the layer to be used. Default: 'data' -#' @param gene.set A set of gene symbols. If specified, it will be used instead of gene.symbol.pattern. Default: FALSE -#' @param verbose Logical indicating whether to display detailed messages (TRUE) or not (FALSE). Default: TRUE +#' @param gene.set A set of gene symbols. If specified, it will be used instead of gene.symbol.pattern. Default: `FALSE`. +#' @param verbose Logical indicating whether to display detailed messages (TRUE) or not (FALSE). Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -549,7 +550,7 @@ addMetaFraction <- function( #' @param obj A Seurat object to be updated. Default: None. #' @param gene_fractions A named list containing gene symbol patterns for each meta column name. #' Default: List of predefined gene fractions. -#' @param add_hga A logical value indicating whether to add percent.HGA meta data. Default: TRUE. +#' @param add_hga A logical value indicating whether to add percent.HGA meta data. Default: `TRUE`. #' #' @return An updated Seurat object. #' @export @@ -557,23 +558,25 @@ addMetaFraction <- function( #' @importFrom SeuratObject UpdateSeuratObject #' addGeneClassFractions <- function(obj, - gene_fractions = list( - "percent.mito" = "^MT\\.|^MT-", - "percent.ribo" = "^RPL|^RPS", - "percent.AC.GenBank" = "^AC[0-9]{6}\\.", - "percent.AL.EMBL" = "^AL[0-9]{6}\\.", - "percent.LINC" = "^LINC0", - "percent.MALAT1" = "^MALAT1" - ), - add_hga = TRUE) { + gene_fractions = list( + "percent.mito" = "^MT\\.|^MT-", + "percent.ribo" = "^RPL|^RPS", + "percent.AC.GenBank" = "^AC[0-9]{6}\\.", + "percent.AL.EMBL" = "^AL[0-9]{6}\\.", + "percent.LINC" = "^LINC0", + "percent.MALAT1" = "^MALAT1" + ), + add_hga = TRUE) { message("Adding meta data for gene-class fractions, e.g., percent.mito, etc.") for (col_name in names(gene_fractions)) { if (!metaColnameExists(col_name = col_name, obj = obj)) { gene_data <- gene_fractions[[col_name]] message("Adding ", col_name, "to @meta.data...", gene_data) - obj <- addMetaFraction(col.name = col_name, gene.symbol.pattern = gene_data, obj = obj, - assay = "RNA") + obj <- addMetaFraction( + col.name = col_name, gene.symbol.pattern = gene_data, obj = obj, + assay = "RNA" + ) } else { message(paste(col_name, "already present.")) } @@ -877,30 +880,29 @@ transferMetadata <- function(from, to, ) } - if(strict) { - stopifnot("There are cells ONLY present in the destination object. Cannot transfer metadata." = (nr.cells.only.to == 0) ) + if (strict) { + stopifnot("There are cells ONLY present in the destination object. Cannot transfer metadata." = (nr.cells.only.to == 0)) } else { warning("There are cells ONLY present in the destination object. Filled with NA", immediate. = TRUE) } - if(nr.cells.only.from > 0 & verbose) warning("There are cells ONLY present in the FROM object. These will be ignored.", immediate. = TRUE) + if (nr.cells.only.from > 0 & verbose) warning("There are cells ONLY present in the FROM object. These will be ignored.", immediate. = TRUE) # Transfer metadata columns _______________________________________________________ for (i in seq_along(colname_from)) { - # Check if to-column exists in destination object OR you overwrite anyway if (!(colname_to[i] %in% colnames(to@meta.data)) || overwrite) { - # Check if column exists in source object if (colname_from[i] %in% colnames(from@meta.data)) { - # Transfer the metadata column # to[[colname_to[i]]] <- from[[colname_from[i]]] metadata_from <- getMetadataColumn(obj = from, col = colname_from[i]) - to <- addMetaDataSafe(obj = to, col.name = colname_to[i], metadata = metadata_from[colnames(to)], - strict = strict) + to <- addMetaDataSafe( + obj = to, col.name = colname_to[i], metadata = metadata_from[colnames(to)], + strict = strict + ) message(sprintf("Transferred '%s' to '%s'.", colname_from[i], colname_to[i])) } else { @@ -918,7 +920,6 @@ transferMetadata <- function(from, to, x <- clUMAP(obj = to, ident = colname_to[i], suffix = "transferred.ident", ...) print(x) } - } # for return(to) @@ -971,21 +972,22 @@ sampleNpc <- function(metaDF = MetaData[which(Pass), ], pc = 0.1) { #' @importFrom dplyr bind_rows select #' @export merge_seurat_metadata <- function(ls_obj, include_cols = NULL, exclude_cols = NULL) { - # Assert that ls_obj is a list and all elements are Seurat objects - stopifnot(is.list(ls_obj), length(ls_obj) > 0, - all(sapply(ls_obj, function(x) inherits(x, "Seurat")))) # Ensure all are Seurat objects + stopifnot( + is.list(ls_obj), length(ls_obj) > 0, + all(sapply(ls_obj, function(x) inherits(x, "Seurat"))) + ) # Ensure all are Seurat objects # Extract metadata from each Seurat object metadata_list <- lapply(ls_obj, function(seurat_obj) seurat_obj@meta.data) # Assert that the number of columns are the same in all metadata col_counts <- sapply(metadata_list, ncol) - stopifnot(length(unique(col_counts)) == 1) # Ensure all metadata have the same number of columns + stopifnot(length(unique(col_counts)) == 1) # Ensure all metadata have the same number of columns # Assert that rownames (cell names) are unique across all metadata cell_names <- unlist(lapply(metadata_list, rownames)) - stopifnot(length(unique(cell_names)) == length(cell_names)) # Ensure all cell names are unique + stopifnot(length(unique(cell_names)) == length(cell_names)) # Ensure all cell names are unique # Optionally select columns to include or exclude if (!is.null(include_cols)) { @@ -1019,6 +1021,9 @@ merge_seurat_metadata <- function(ls_obj, include_cols = NULL, exclude_cols = NU #' @param ls.Obj A list of objects, each containing a `@meta.data` slot. #' @param cols.remove A character vector of column names to be removed from each metadata data frame. #' Default is an empty character vector, meaning no columns will be removed. +#' @param save_as_qs A logical indicating whether to save the merged metadata as a .qs object. +#' @param save_as_tsv A logical indicating whether to save the merged metadata as a .tsv file. +#' @param ... Additional arguments to be passed to `write.table`. #' #' @details #' The function starts by validating the input to ensure it's a list. It then extracts the `@meta.data` @@ -1093,14 +1098,14 @@ writeCombinedMetadataToTsvFromLsObj <- function(ls.Obj, cols.remove = character( #' @param obj The main Seurat object used for calculations. No default value. #' @param cormethod The method to calculate correlations. Can be either "pearson" or "spearman". Default: "pearson". #' @param main The main title for the plot. Default: "Metadata correlations" followed by the correlation method. -#' @param show_numbers Logical, determines if correlation values should be displayed on the plot. Default: FALSE. +#' @param show_numbers Logical, determines if correlation values should be displayed on the plot. Default: `FALSE`. #' @param digits The number of decimal places for displayed correlation values. Default: 1. #' @param suffix A suffix added to the output filename. Default: NULL. -#' @param add_PCA Logical, determines if PCA values should be included in the correlation calculation. Default: TRUE. +#' @param add_PCA Logical, determines if PCA values should be included in the correlation calculation. Default: `TRUE`. #' @param n_PCs The number of PCA components to be included if 'add_PCA' is TRUE. Default: 8. #' @param w The width of the plot in inches. Default: ceiling((length(columns)+n_PCs)/2). #' @param h The height of the plot in inches. Default: the value of w. -#' @param use_ggcorrplot Logical, determines if the ggcorrplot package should be used for plotting. Default: FALSE. +#' @param use_ggcorrplot Logical, determines if the ggcorrplot package should be used for plotting. Default: `FALSE`. #' @param n_cutree The number of clusters to be used in hierarchical clustering. Default: the number of PCs. #' @param ... Additional parameters passed to the internally called ggcorrplot function. #' @@ -1205,12 +1210,13 @@ heatmap_calc_clust_median <- function( w = ceiling(length(variables) / 2), ...) { # Ensure that 'meta' is a dataframe, 'ident' is a column in 'meta', and 'variables' are columns in 'meta' - stopifnot(is.data.frame(meta), - is.character(ident), - is.character(variables), - ident %in% colnames(meta), - all(variables %in% colnames(meta)) - ) + stopifnot( + is.data.frame(meta), + is.character(ident), + is.character(variables), + ident %in% colnames(meta), + all(variables %in% colnames(meta)) + ) # Group by 'ident' and calculate median for each variable df_cluster_medians <- meta |> @@ -1249,7 +1255,7 @@ heatmap_calc_clust_median <- function( pl <- pheatmap::pheatmap(corX, main = paste0("Correlation between ", plot_name), treeheight_row = 2, treeheight_col = 2, - # cluster_cols = F, cluster_rows = F, + # cluster_cols = FALSE, cluster_rows = FALSE, cutree_rows = n_cut_row, cutree_cols = n_cut_col ) @@ -1257,8 +1263,6 @@ heatmap_calc_clust_median <- function( x = pl, width = w, plotname = FixPlotName(make.names(plot_name), suffix, "correlation.pdf") ) - - } } @@ -1273,10 +1277,10 @@ heatmap_calc_clust_median <- function( #' @param group.by The variable to group by for calculations. Default: Second result of GetClusteringRuns(obj). #' @param method Method used for calculations, either "median" or "mean". Default: "median". #' @param min.thr Minimum threshold percentage for a cluster. Default: 2.5. -#' @param return.matrix Logical; if TRUE, returns a matrix. Default: FALSE. +#' @param return.matrix Logical; if TRUE, returns a matrix. Default: `FALSE`. #' @param main Main title for the plot. Default: "read fractions per transcript class and cluster" followed by the method and suffix. #' @param ylab Label for the y-axis. Default: "Fraction of transcriptome (%)". -#' @param percentify Logical. If TRUE, multiplies the fraction by 100. Default: TRUE. +#' @param percentify Logical. If TRUE, multiplies the fraction by 100. Default: `TRUE`. #' @param subt Subtitle for the plot. Default: NULL. #' @param position Position adjustment for geoms. Default: position_stack(). #' @param w The width of the plot. Default: 10. @@ -1355,6 +1359,8 @@ plotMetadataMedianFractionBarplot <- function( #' @param both_pc_and_value If `TRUE`, labels on the pie chart will show both the percentage #' and the count of each category. If `FALSE`, only the percentage is shown. #' @param subtitle Optional subtitle for the pie chart. +#' @param labels Optional labels for the pie chart. +#' #' @param ... Additional arguments to pass to the pie chart plotting function. #' #' @examples @@ -1389,7 +1395,8 @@ plotMetadataCategPie <- function( both_pc_and_value = both_pc_and_value, LegendSide = LegendSide, labels = labels, LegendTitle = "", subtitle = subtitle, - ...) + ... + ) } @@ -1522,7 +1529,6 @@ renameSmallCategories <- function( } - # _________________________________________________________________________________________________ #' @title Transfer labels from a reference Seurat object to a query Seurat object #' @@ -1531,22 +1537,33 @@ renameSmallCategories <- function( #' reference and the combined objects using Uniform Manifold Approximation and Projection (UMAP). #' #' @param query_obj A Seurat object for which the labels are to be transferred. -#' @param reference_path A character string indicating the file path to the reference Seurat object. The path must exist. -#' @param reference_obj Alternative to `reference_path`. If provided, the path is not used to load the reference data. -#' @param named_ident A character string specifying the name of the identity class to be used from the reference Seurat object. Default is 'RNA_snn_res.0.3.ordered.ManualNames'. -#' @param new_ident A character string specifying the name of the new identity class to be created in the query Seurat object. Default is obtained by replacing 'ordered' with 'transferred' in named_ident. -#' @param predictions_col A character string specifying the column in the metadata of the transferred Seurat object containing the transferred labels. Default is 'predicted.id'. -#' @param save_anchors save anchors as RDS file. -#' @param suffix A character string to be used as a suffix in the visualization. Default is 'NEW'. -#' @param plot_suffix A string to added to the UMAP with the new identity. -#' @param h Height for the saved image. Default: 12 -#' @param w Width for the saved image. Default: 9 -#' @param ... Additional arguments passed to the Seurat.utils::clUMAP function. +#' @param reference_obj Alternative to `reference_path`. If provided, the path is not used to load +#' the reference data. +#' @param reference_path A character string indicating the file path to the reference Seurat object. +#' The path must exist. +#' @param reference_ident A character string specifying the name of the identity class to be used +#' from the reference Seurat object. Default is 'RNA_snn_res.0.3.ordered.ManualNames'. +#' @param anchors A list of anchors obtained from the FindTransferAnchors function. If NULL, the +#' @param new_ident A character string specifying the name of the new identity class to be +#' created in the query Seurat object. Default is obtained by replacing 'ordered' with +#' 'transferred' in reference_ident. +#' @param predictions_col A character string specifying the column in the metadata of the transferred +#' Seurat object containing the transferred labels. Default is 'predicted.id'. +#' @param predictions_score A character string specifying the column in the metadata of the transferred +#' Seurat object containing the scores of the transferred labels. Default is 'transferred.score'. +#' @param save_anchors A logical indicating whether to save the anchors as an RDS file. Default is TRUE. +#' @param reference_suffix A character string to be used as in the subtitle of the reference UMAP plot. +#' Default is 'REFERENCE.obj'. +#' @param plot_suffix A character string to be added to the UMAP with the new identity. Default is NULL. +#' @param plot_reference A logical indicating whether to plot the reference UMAP. Default is TRUE. +#' @param h Height for the saved image. Default: `12` +#' @param w Width for the saved image. Default: `9` +#' @param ... Additional arguments passed to the `Seurat.utils::clUMAP` function. #' #' @return The modified query Seurat object with the transferred labels as a new identity class. #' #' @examples -#' # combined.objX <- transferLabelsSeurat(named_ident = 'RNA_snn_res.0.3.ordered.ManualNames', +#' # combined.objX <- transferLabelsSeurat(reference_ident = 'RNA_snn_res.0.3.ordered.ManualNames', #' # reference_obj = reference_obj, #' # query_obj = combined.obj) #' @@ -1589,7 +1606,7 @@ transferLabelsSeurat <- function( # Visualize reference object if (plot_reference) { clUMAP( - obj = reference_obj, ident = reference_ident, label = F, + obj = reference_obj, ident = reference_ident, label = FALSE, suffix = reference_suffix, sub = reference_suffix, w = w, h = h, ... ) @@ -1625,18 +1642,20 @@ transferLabelsSeurat <- function( col.name = predictions_score ) - qSeuViolin(feature = ppp(new_ident, 'score'), ident = new_ident, - sub = Seurat.utils:::.parseBasicObjStats(query_obj), - pt.size = 0.0, obj = query_obj) + qSeuViolin( + feature = ppp(new_ident, "score"), ident = new_ident, + sub = Seurat.utils:::.parseBasicObjStats(query_obj), + pt.size = 0.0, obj = query_obj + ) # Visualize combined object clUMAP( - ident = new_ident, obj = query_obj, suffix = plot_suffix, label = F, + ident = new_ident, obj = query_obj, suffix = plot_suffix, label = FALSE, w = w, h = h, ... ) qUMAP( - feature = predictions_score, obj = query_obj, suffix = plot_suffix, label = F, + feature = predictions_score, obj = query_obj, suffix = plot_suffix, label = FALSE, w = w, h = h, ... ) diff --git a/R/Seurat.Utils.Metadata.R.bac b/R/Seurat.Utils.Metadata.R.bac new file mode 100644 index 0000000..cc27885 --- /dev/null +++ b/R/Seurat.Utils.Metadata.R.bac @@ -0,0 +1,1840 @@ +# ____________________________________________________________________ +# Seurat.Utils.Metadata ---- +# ____________________________________________________________________ +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Metadata.R") +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") + +# devtools::load_all(path = '~/GitHub/Packages/Seurat.utils'); +# devtools::document("~/GitHub/Packages/Seurat.utils"); devtools::load_all("~/GitHub/Packages/Seurat.utils") +# source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.R") +# source('~/.pack.R') + + + +# _________________________________________________________________________________________________ +# Extract and check metadata columns ______________________________ ---- +# _________________________________________________________________________________________________ + +#' @title Add Translated Metadata to a Seurat Object +#' +#' @description +#' This function translates a specified metadata vector in a Seurat object using a named vector of old +#' and new values and adds it to the Seurat object with a specified suffix. The function also generates +#' UMAP plots for the new metadata. +#' +#' @param obj A Seurat object to be updated. Default: combined.obj. +#' @param orig.ident A character string specifying the original metadata to be translated. Default: "RNA_snn_res.0.4". +#' @param translation_as_named_vec A named vector where names are old values and values are new translations. Default: None. +#' @param suffix A character string specifying the suffix for the new metadata column name. Default: ".". +#' +#' @return An updated Seurat object. +#' +#' @importFrom CodeAndRoll2 translate +#' @export + +addTranslatedMetadata <- function(obj = combined.obj, + orig.ident = "RNA_snn_res.0.4", + translation_as_named_vec, + new_col_name = substitute(translation_as_named_vec), + suffix = NULL, + ...) { + # Input assertions + stopifnot(is(obj, "Seurat"), + is.character(orig.ident) && length(orig.ident) == 1, + is.character(suffix) | is.null(suffix), + "Not a named vec was provided!" = !is.null(names(translation_as_named_vec)) + ) + message("new_col_name: ", new_col_name) + + # Translate metadata + obj@meta.data[[new_col_name]] <- CodeAndRoll2::translate(vec = as.character(obj@meta.data[[orig.ident]]), + old = names(translation_as_named_vec), + new = translation_as_named_vec) + + # message("new_col_name: ", new_col_name) + # clUMAP(ident = new_col_name, obj = obj, caption = NULL, ...) + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Get Metadata Column Names Matching Pattern +#' +#' @description Retrieves column names from an object's metadata that match a specified pattern. +#' +#' @param pattern A character string containing a regular expression to match against the +#' column names. Default: "RNA". +#' @param obj An object containing a `meta.data` slot, typically from combined datasets. +#' Default: `combined.obj`. +#' +#' @return A character vector of column names matching the pattern. +#' +#' @examples +#' # Assuming `combined.obj` is an object with a meta.data slot +#' getMetaColnames() +#' +#' @export +getMetaColnames <- function(obj = combined.obj, + pattern = "RNA") { + stopifnot(inherits(obj, "Seurat")) + + # Retrieve column names matching the pattern + matchedColnames <- grep(pattern = pattern, x = colnames(obj@meta.data), value = TRUE) + + # Output assertion + if (is.null(matchedColnames)) { + warning("No matching meta data!", immediate. = TRUE) + } else { + message(length(matchedColnames), " columns matching pattern '", pattern, "'.") + } + + dput(matchedColnames) + invisible(matchedColnames) +} + + +# _________________________________________________________________________________________________ +#' @title Check if a Column Exists in the Metadata of an S4 Object +#' +#' @description This function checks whether a given column exists in the meta.data of a Seurat object. +#' @param obj A Seurat object. +#' @param col_name A character string specifying the name of the column. +#' +#' @return A logical value indicating whether the column exists (TRUE) or not (FALSE). +#' @export +metaColnameExists <- function(col_name, obj = combined.obj) { + col_name %in% colnames(obj@meta.data) +} + +# _________________________________________________________________________________________________ +#' @title getMetadataColumn +#' +#' @description Retrieves a specified metadata column from a Seurat object and returns it as a named vector. +#' @param col A string specifying the name of the metadata column to be retrieved. Default: 'batch'. +#' @param obj A Seurat object from which the metadata column will be retrieved. Default: combined.obj. +#' @param as_numeric A logical flag indicating whether the returned values should be converted to numeric format. Default: `FALSE`. (FALSE). +#' @return A named vector containing the values from the specified metadata column. If 'as_numeric' is TRUE, the values are converted to numeric format. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Example usage: +#' batch_metadata <- getMetadataColumn(col = "batch", obj = combined.obj, as_numeric = TRUE) +#' } +#' } +#' @export +getMetadataColumn <- function(col = "batch", obj = combined.obj, as_numeric = FALSE) { + stopifnot(col %in% colnames(obj@meta.data)) + + x <- df.col.2.named.vector(df = obj@meta.data, col = col) + if (as_numeric) { + as.numeric.wNames(x) + 1 + } else { + x + } +} + +# mmeta <- getMetadataColumn + +# _________________________________________________________________________________________________ +#' @title Get Unique Levels of a Seurat Object Ident Slot +#' +#' @description This function extracts the unique levels present in the 'ident' slot of a Seurat object. +#' The function throws an error if the number of levels exceeds 'max_levels'. +#' The function optionally prints the R code to recreate the 'Levels' vector using 'dput'. +#' +#' @param obj A Seurat object. +#' @param ident A character string representing the name of the slot in the Seurat object. +#' @param max_levels An integer that sets the maximum number of levels allowed. Default is 100. +#' @param dput A logical that decides whether to print the R code to recreate the 'Levels' vector. Default is TRUE. +#' +#' @return A vector of unique levels present in the 'ident' slot of the Seurat object. +#' +#' @importFrom tibble deframe +#' @import Seurat +#' @export + +get_levels_seu <- function(obj, ident, max_levels = 100, dput = TRUE) { + Levels <- unique(deframe(obj[[ident]])) + stopifnot(length(Levels) < max_levels) + if (dput) { + cat("Levels <- ") + dput(Levels) + } + return(Levels) +} + + +#' @title Calculate Average Metadata for Seurat Object +#' +#' @description Computes specified metrics (e.g., median, mean) for given metadata features across each category +#' defined by an identity column in a Seurat object's metadata. This function allows for flexible +#' metric calculation on specified features, providing insights into the data distribution. +#' +#' @param obj A Seurat object containing metadata to be analyzed. Defaults to `combined.obj`. +#' @param meta.features A character vector specifying which metadata features to calculate metrics for. +#' Defaults to c("nFeature_RNA", "percent.ribo", "percent.mito"). +#' @param ident The name of the identity column used to group the data before calculating metrics. +#' The default is the second entry from `GetNamedClusteringRuns()`. +#' @param metrics A list of named metrics to calculate for the metadata features, where names are +#' the metric names (e.g., 'median', 'mean') and values are the corresponding functions. +#' Defaults to list('median' = median, 'mean' = mean). +#' @param verbose Logical flag indicating whether to print detailed information about the metrics +#' calculation process. Defaults to TRUE. +#' @param max.categ max number of groups in ident. +#' +#' @return A list containing data frames with calculated metrics for each specified metadata feature, +#' grouped by the identity categories. Each data frame corresponds to one of the specified metrics. +#' +#' @examples +#' # Assuming `combined.obj` is a Seurat object with relevant metadata columns: +#' results <- calculateAverageMetaData( +#' obj = combined.obj, +#' meta.features = c("nFeature_RNA", "nCount_RNA"), +#' metrics = list("median" = median, "mean" = mean), +#' verbose = TRUE +#' ) +#' # This will return a list with data frames containing the median and mean +#' # of "nFeature_RNA" and "percent.ribo" for each category in "ident_column_name". +#' +#' @export +calculateAverageMetaData <- function( + obj = combined.obj, + meta.features = c("nFeature_RNA", "percent.ribo", "percent.mito"), + ident = GetClusteringRuns()[1], + metrics = list("median" = median, "mean" = mean), + verbose = TRUE, max.categ = 30) { + stopifnot( + is(obj, "Seurat"), + "ident not found in object" = ident %in% colnames(obj@meta.data), + "Not all meta.features found in object" = all(meta.features %in% colnames(obj@meta.data)), + length(unique(obj@meta.data[, ident])) < max.categ + ) + + # browser() + # Initialize list to store results + results <- list() + + # Calculate metrics for each meta.feature within each ident category + for (m in names(metrics)) { + results[[m]] <- obj@meta.data |> + group_by(!!sym(ident)) |> + summarise(across(all_of(meta.features), metrics[[m]], na.rm = TRUE), .groups = "drop") + } + + # Verbose output + if (verbose) { + cat("Calculated metrics:", paste(names(metrics), collapse = ", "), "\n") + cat("For features:", paste(meta.features, collapse = ", "), "\n") + cat("Based on identifier:", ident, "\n") + } + return(results) +} + + +# _________________________________________________________________________________________________ +#' @title Calculate the Percentage of Matches per Category +#' +#' @description This function calculates the percentage of matches for specified metadata features +#' against provided match values within each category of an identifier in a Seurat object. +#' +#' @param obj A Seurat object containing the data to be analyzed. Default: combined.obj. +#' @param ident A string specifying the column in the metadata that identifies the categories. +#' Default: first element of `GetClusteringRuns()`. +#' @param meta.features A vector of strings specifying which metadata features to analyze. +#' Default: c("AAV.detected.min2", "AAV.detected"). +#' @param match.values A named vector where names correspond to `meta.features` and values are +#' the strings to match against. Default: c("AAV.detected.min2" = "AAV", "AAV.detected" = "AAV"). +#' @param verbose A logical value indicating whether to print detailed output. Default: `TRUE`.. +#' @param max.categ The maximum number of categories allowed before stopping. Default: 30. +#' +#' @return A data frame with the category as the first column and the subsequent columns showing +#' the percentage of matches for each metadata feature. +#' @export +#' +#' @examples +#' calculatePercentageMatch(obj = combined.obj, ident = "Simple_Celltypes") +calculatePercentageMatch <- function( + obj, + ident = GetClusteringRuns()[1], + meta.features = c("AAV.detected.min2", "AAV.detected"), + match.values = c("AAV.detected.min2" = "AAV", "AAV.detected" = "AAV"), # Named vector for matches + verbose = TRUE, + max.categ = 100) { + # Check for preconditions + stopifnot( + is(obj, "Seurat"), + "ident not found in object" = ident %in% colnames(obj@meta.data), + "Not all meta.features found in object" = all(meta.features %in% colnames(obj@meta.data)), + "Too many categories" = length(unique(obj@meta.data[, ident])) < max.categ, + length(match.values) == length(meta.features), # Check if match.values has the same length as meta.features + all(names(match.values) == meta.features) # Ensure match.values has names corresponding to meta.features + ) + + # Initialize a data frame to store results + results <- data.frame(Category = unique(obj@meta.data[[ident]])) + + # Calculate the percentage of matches for each meta.feature within each ident category + for (feature in meta.features) { + results[[paste0("pct_match_", feature)]] <- sapply(results$Category, function(cat) { + idx <- obj@meta.data[[ident]] == cat + subset_data <- obj@meta.data[idx, feature, drop = TRUE] + pct_match <- mean(subset_data == match.values[feature], na.rm = TRUE) + return(pct_match) + }) + } + + # Verbose output + if (verbose) { + cat("Calculated percentage of matches for values:", paste(match.values, collapse = ", "), "\n") + cat("Corresponding to features:", paste(meta.features, collapse = ", "), "\n") + cat("Based on identifier:", ident, "\n") + } + + return(results) +} + + + +# _________________________________________________________________________________________________ +#' @title getMedianMetric.lsObj +#' +#' @description Get the median values of different columns in meta.data, can iterate over a list of Seurat objects. +#' @param ls.obj List of Seurat objects, Default: ls.Seurat +#' @param n.datasets lenght of list (n objects), Default: length(ls.Seurat) +#' @param mColname Metadata column name to calculate on. Default: 'percent.mito' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Seurat <- getMedianMetric.lsObj( +#' ls.obj = ls.Seurat, n.datasets = length(ls.Seurat), +#' mColname = "percent.mito" +#' ) +#' } +#' } +#' @export +getMedianMetric.lsObj <- function(ls.obj = ls.Seurat, n.datasets = length(ls.Seurat), mColname = "percent.mito") { + medMetric <- vec.fromNames(names(ls.obj)) + for (i in 1:n.datasets) { + medMetric[i] <- median(ls.obj[[i]]@meta.data[, mColname]) + } + return(medMetric) +} + + + +# _________________________________________________________________________________________________ +#' @title getCellIDs.from.meta +#' +#' @description Retrieves cell IDs from a specified metadata column of a Seurat object, where the cell ID matches a provided list of values. The matching operation uses the `%in%` operator. +#' @param ident A string specifying the name of the metadata column from which to retrieve cell IDs. Default: 'res.0.6'. +#' @param ident_values A vector of values to match in the metadata column. Default: `NA`.. +#' @param obj The Seurat object from which to retrieve the cell IDs. Default: combined.obj. +#' @param inverse A boolean value indicating whether to inverse the match, i.e., retrieve cell IDs that do not match the provided list of ident_values. Default: `FALSE`.. +#' @return A vector of cell IDs that match (or don't match, if `inverse = TRUE`) the provided list of values. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Example usage: +#' getCellIDs.from.meta() +#' } +#' } +#' @export +getCellIDs.from.meta <- function(ident = GetClusteringRuns()[1], + ident_values = NA, obj = combined.obj, + inverse = FALSE) { + # browser() + mdat <- obj@meta.data[, ident] + cells.pass <- mdat %in% ident_values + if (inverse) cells.pass <- !cells.pass + + iprint(sum(cells.pass), "cells found.") + return(rownames(obj@meta.data)[which(cells.pass)]) +} + + +# _________________________________________________________________________________________________ +# Add new metadata ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Add Metadata to a Seurat object, safely with Checks +#' +#' @description Wrapper function for `AddMetaData` that includes additional checks and assertions. +#' +#' @param obj Seurat object to which metadata will be added. +#' @param metadata The metadata to be added. +#' @param col.name The name of the new metadata column. +#' @param overwrite Logical; if TRUE, overwrites the existing column. +#' @param verbose Logical; if TRUE, prints additional information. +#' @param strict ... +#' +#' @return Modified Seurat object with additional metadata. +#' @importFrom Seurat AddMetaData +#' @export +addMetaDataSafe <- function(obj, metadata, col.name, overwrite = FALSE, verbose = F, + strict = TRUE) { + + if (verbose) message("Running addMetaDataSafe...") + # browser() + stopifnot( + is(obj, "Seurat"), is.vector(metadata), is.character(col.name), is.logical(overwrite), + "Column already exists" = ((!col.name %in% colnames(obj@meta.data)) | overwrite) + ) + equal_length <- length(metadata) == ncol(obj) + iprint('strict', strict) + if(strict) stopifnot("Metadata or object too short" = equal_length) + + + if (!is.null(names(metadata))) { + if(verbose) print(head(names(metadata))) + if(verbose) print(head(colnames(obj))) + if(strict) stopifnot(names(metadata) == colnames(obj)) + } else { + message("No CBCs associated with new metadata. Assuming exact match.") + if (!equal_length) stop("Not equal lenght, no CBCs") + names(metadata) <- colnames(obj) + } + + # Perform the operation + obj <- Seurat::AddMetaData(object = obj, metadata = metadata, col.name = col.name) + + prefix <- paste("New column", col.name) + # Check for NA or NaN values + if (all(is.na(metadata) | is.nan(metadata))) { + warning(paste(prefix, "contains only NA or NaN values.")) + } else if (any(is.na(metadata) | is.nan(metadata))) { + message(paste(prefix, "contains NA or NaN values.")) + } + + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Create a Metadata Vector +#' +#' @description This function creates a metadata vector from an input vector and a Seurat object. +#' The resulting vector contains values from 'vec' for the intersecting cell names between 'vec' and 'obj'. +#' It also checks if the intersection between the cell names in 'vec' and 'obj' is more than a +#' minimum intersection size. +#' @param vec A named vector where the names represent cell IDs. This vector should have partial +#' overlap with the cells in a Seurat object. +#' @param obj A Seurat object that contains cell IDs which partially overlap with 'vec'. +#' @param fill The value to fill for non-intersecting cell names in 'obj'. Default is NA. +#' @param min.intersect The minimum number of cells to find in both 'vec' and 'obj'. +#' The function will stop if the intersection is less than this number. Default is 100. +#' @return A named vector of length equal to the number of cells in 'obj', with names from 'obj' and +#' values from 'vec' for intersecting cell names. +#' @examples +#' \dontrun{ +#' create.metadata.vector(vec = my_vector, obj = my_seurat_object, min.intersect = 50) +#' } +#' @export +create.metadata.vector <- function(vec, obj = combined.obj, fill = NA, + min.intersect = min(length(vec), ncol(obj), 100)) { + + stopifnot(is.vector(vec), is(obj, "Seurat") ) + cells_in_obj <- colnames(obj) + cells_in_vec <- names(vec) + stopifnot(is.character(cells_in_obj), is.character(cells_in_vec)) + + cells_in_both <- length(intersect(cells_in_vec, cells_in_obj)) + if(cells_in_both < min.intersect) { + message(ncol(obj), " cells in obj: ", kppc(cells_in_obj[1:3]) ,"\n", + length(vec), " cells in vec: ", kppc(cells_in_vec[1:3]) ,"\n", + cells_in_both, " cells in both.\n" + ) + stop("Intersection between vec and obj is less than min.intersect.") + } + + + cells.vec <- names(vec) + cells.obj <- colnames(obj) + cells.in.both <- intersect(cells.vec, cells.obj) + + iprint( + length(cells.in.both), "cells in both;", + length(cells.vec), "cells in vec;", + length(cells.obj), "cells in obj", + "intersect, e.g.:", head(cells.in.both, 5) + ) + + new_assignment <- CodeAndRoll2::vec.fromNames(cells.obj, fill = fill) + new_assignment[cells.in.both] <- vec[cells.in.both] + return(new_assignment) +} + + +# _________________________________________________________________________________________________ +#' @title addMetaFraction +#' +#' @description Add a new metadata column to a Seurat object, representing the fraction of a gene set in the transcriptome (expressed as a percentage). +#' @param obj Seurat object to which the new metadata column will be added. Default: ls.Seurat[[1]] +#' @param col.name Name of the new metadata column to be added. Default: 'percent.mito' +#' @param gene.symbol.pattern Regular expression pattern to match gene symbols. Default: c("^MT\\.|^MT-", FALSE)[1] +#' @param assay Name of the assay to be used. Default: 'RNA' +#' @param layer Name of the layer to be used. Default: 'data' +#' @param gene.set A set of gene symbols. If specified, it will be used instead of gene.symbol.pattern. Default: `FALSE`. +#' @param verbose Logical indicating whether to display detailed messages (TRUE) or not (FALSE). Default: `TRUE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.mito", gene.symbol.pattern = "^MT\\.|^MT-", obj = ls.Seurat[[1]]) +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.ribo", gene.symbol.pattern = "^RPL|^RPS", obj = ls.Seurat[[1]]) +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.AC.GenBank", gene.symbol.pattern = "^AC[0-9]{6}\\.", obj = ls.Seurat[[1]]) +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.AL.EMBL", gene.symbol.pattern = "^AL[0-9]{6}\\.", obj = ls.Seurat[[1]]) +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.LINC", gene.symbol.pattern = "^LINC0", obj = ls.Seurat[[1]]) +#' ls.Seurat[[1]] <- addMetaFraction(col.name = "percent.MALAT1", gene.symbol.pattern = "^MALAT1", obj = ls.Seurat[[1]]) +#' colnames(ls.Seurat[[1]]@meta.data) +#' HGA_MarkerGenes <- c( +#' "ENO1", "IGFBP2", "WSB1", "DDIT4", "PGK1", "BNIP3", "FAM162A", "TPI1", +#' "VEGFA", "PDK1", "PGAM1", "IER2", "FOS", "BTG1", "EPB41L4A-AS1", "NPAS4", "HK2", "BNIP3L", +#' "JUN", "ENO2", "GAPDH", "ANKRD37", "ALDOA", "GADD45G", "TXNIP" +#' ) +#' sobj <- addMetaFraction(col.name = "percent.HGA", gene.set = HGA_MarkerGenes, obj = sobj) +#' } +#' } +#' @seealso +#' \code{\link[Matrix]{colSums}} +#' @export +#' @importFrom Matrix colSums +#' @importFrom CodeAndRoll2 grepv +addMetaFraction <- function( + obj, + col.name = "percent.mito", + gene.symbol.pattern = c("^MT\\.|^MT-", FALSE)[1], + assay = "RNA", + layer = "data", + gene.set = FALSE, + verbose = TRUE) { + message("Should rather use the default `Seurat::PercentageFeatureSet`") + message("Assay: ", assay) + message("Layer: ", layer) + + # browser() + stopif(condition = isFALSE(gene.set) && isFALSE(gene.symbol.pattern), "Either gene.set OR gene.symbol.pattern has to be defined (!= FALSE).") + if (!isFALSE(gene.set) && !isFALSE(gene.symbol.pattern) && verbose) print("Both gene.set AND gene.symbol.pattern are defined. Only using gene.set.") + + if (!isFALSE(gene.set)) geneset <- check.genes(list.of.genes = gene.set, obj = obj) + total_expr <- Matrix::colSums(GetAssayData(object = obj)) + all.genes <- Features(obj, assay = assay) + + genes.matching <- if (!isFALSE(gene.set)) { + intersect(gene.set, all.genes) + } else { + CodeAndRoll2::grepv(pattern = gene.symbol.pattern, x = all.genes) + } + + genes.expr <- GetAssayData(object = obj, assay = assay, layer = layer)[genes.matching, ] + target_expr <- if (length(genes.matching) > 1) Matrix::colSums(genes.expr) else genes.expr + + iprint(length(genes.matching), "genes found, eg:", sample(genes.matching, 10)) + + obj <- AddMetaData(object = obj, metadata = target_expr / total_expr, col.name = col.name) + colnames(obj@meta.data) + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Add Meta Data for Gene-Class Fractions +#' +#' @description +#' This function adds meta data for various gene-class fractions such as percent.mito, percent.ribo, +#' percent.AC.GenBank, percent.AL.EMBL, percent.LINC, percent.MALAT1, and percent.HGA to a Seurat object. +#' If the meta data already exists, a message will be displayed. +#' +#' @param obj A Seurat object to be updated. Default: None. +#' @param gene_fractions A named list containing gene symbol patterns for each meta column name. +#' Default: List of predefined gene fractions. +#' @param add_hga A logical value indicating whether to add percent.HGA meta data. Default: `TRUE`.. +#' +#' @return An updated Seurat object. +#' @export +#' +#' @importFrom SeuratObject UpdateSeuratObject +#' +addGeneClassFractions <- function(obj, + gene_fractions = list( + "percent.mito" = "^MT\\.|^MT-", + "percent.ribo" = "^RPL|^RPS", + "percent.AC.GenBank" = "^AC[0-9]{6}\\.", + "percent.AL.EMBL" = "^AL[0-9]{6}\\.", + "percent.LINC" = "^LINC0", + "percent.MALAT1" = "^MALAT1" + ), + add_hga = TRUE) { + message("Adding meta data for gene-class fractions, e.g., percent.mito, etc.") + + for (col_name in names(gene_fractions)) { + if (!metaColnameExists(col_name = col_name, obj = obj)) { + gene_data <- gene_fractions[[col_name]] + message("Adding ", col_name, "to @meta.data...", gene_data) + obj <- addMetaFraction(col.name = col_name, gene.symbol.pattern = gene_data, obj = obj, + assay = "RNA") + } else { + message(paste(col_name, "already present.")) + } + } + + if (add_hga) { + message("Adding percent.HGA to @meta.data...") + HGA_MarkerGenes <- c( + "ENO1", "IGFBP2", "WSB1", "DDIT4", "PGK1", "BNIP3", "FAM162A", "TPI1", + "VEGFA", "PDK1", "PGAM1", "IER2", "FOS", "BTG1", "EPB41L4A-AS1", "NPAS4", "HK2", "BNIP3L", + "JUN", "ENO2", "GAPDH", "ANKRD37", "ALDOA", "GADD45G", "TXNIP" + ) + + if (!metaColnameExists(col_name = "percent.HGA", obj = obj)) { + obj <- addMetaFraction(col.name = "percent.HGA", gene.set = HGA_MarkerGenes, obj = obj) + } else { + message("percent.HGA already present.") + } + } + + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title add.meta.tags +#' +#' @description Add metadata tags to a Seurat object dataset. +#' @param list.of.tags A list of tags to be added as metadata. Default: tags +#' @param obj A Seurat object to which the metadata tags are to be added. Default: ls.Seurat[[1]] +#' @param n The index specifying the dataset for which the tags should be applied. Default: 1 +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Seurat[[1]] <- add.meta.tags(list.of.tags = tags, obj = ls.Seurat[[1]], n = 1) +#' } +#' } +#' @export +add.meta.tags <- function(list.of.tags = tags, obj = ls.Seurat[[1]], n = 1) { # N is the for which dataset + stopifnot(length(names(tags)) == length(tags)) + nCells <- nrow(obj@meta.data) + for (i in 1:length(list.of.tags)) { + tagX <- list.of.tags[[i]] + new.meta.tag.per.cell <- rep(tagX[n], nCells) + obj <- AddMetaData(object = obj, metadata = new.meta.tag.per.cell, col.name = names(tags)[i]) + } + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title seu.add.meta.from.table +#' +#' @description Add multiple new metadata columns to a Seurat object from a table. # +#' @param obj Seurat object, Default: seu.ORC +#' @param meta Metadata data frame. +#' @param suffix A suffix added to the filename, Default: '.fromMeta' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- seu.add.meta.from.table() +#' } +#' } +#' @export +seu.add.meta.from.table <- function(obj = combined.obj, meta, suffix = ".fromMeta") { # Add multiple new metadata columns to a Seurat object from a table. + NotFound <- setdiff(colnames(obj), rownames(meta)) + Found <- intersect(colnames(obj), rownames(meta)) + if (length(NotFound)) iprint(length(NotFound), "cells were not found in meta, e.g.: ", trail(NotFound, N = 10)) + + mCols.new <- colnames(meta) + mCols.old <- colnames(obj@meta.data) + overlap <- intersect(mCols.new, mCols.old) + if (length(overlap)) { + iprint(length(overlap), "metadata columns already exist in the seurat object: ", overlap, ". These are tagged as: *", suffix) + colnames(meta)[overlap] <- paste0(overlap, suffix) + } + mCols.add <- colnames(meta) + obj@meta.data[Found, mCols.add] <- meta[Found, ] + + return(obj) +} + +# _________________________________________________________________________________________________ +#' @title seu.map.and.add.new.ident.to.meta +#' +#' @description Adds a new metadata column to a Seurat object based on an identity mapping table. +#' @param obj The Seurat object to which the new metadata column will be added. Default: combined.obj. +#' @param ident.table A data frame or matrix with identity mapping data. This parameter is used to map the old identities to the new ones. Default: clusterIDs.GO.process. +#' @param orig.ident The original identities of the Seurat object. Default: Idents(obj). +#' @param metaD.colname A string specifying the name of the new metadata column. The default value is the name of the provided ident.table. +#' @return A Seurat object with the new metadata column added. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Example usage: +#' combined.obj <- seu.map.and.add.new.ident.to.meta( +#' obj = combined.obj, +#' ident.table = clusterIDs.GO.process +#' ) +#' } +#' } +#' @export +seu.map.and.add.new.ident.to.meta <- function( + obj = combined.obj, ident.table = clusterIDs.GO.process, + orig.ident = Idents(obj), + metaD.colname = substitute(ident.table)) { + # identities should match + { + Idents(obj) <- orig.ident + ident.vec <- df.col.2.named.vector(ident.table) + ident.X <- names(ident.vec) + ident.Y <- as.character(ident.vec) + ident.Seu <- gtools::mixedsort(levels(Idents(obj))) + iprint("ident.Seu: ", ident.Seu) + + OnlyInIdentVec <- setdiff(ident.X, ident.Seu) + OnlyInSeuratIdents <- setdiff(ident.Seu, ident.X) + + msg.IdentVec <- kollapse("Rownames of 'ident.table' have entries not found in 'Idents(obj)':", + OnlyInIdentVec, " not found in ", ident.Seu, + collapseby = " " + ) + + msg.Seu <- kollapse("Rownames of 'Idents(obj)' have entries not found in 'ident.table':", + OnlyInSeuratIdents, " not found in ", ident.X, + collapseby = " " + ) + + stopif(length(OnlyInIdentVec), message = msg.IdentVec) + stopif(length(OnlyInSeuratIdents), message = msg.Seu) + } + # identity mapping + { + new.ident <- CodeAndRoll2::translate(vec = as.character(Idents(obj)), old = ident.X, new = ident.Y) + obj@meta.data[[metaD.colname]] <- new.ident + iprint(metaD.colname, "contains the named identitites. Use Idents(combined.obj) = '...'. The names are:") + cat(paste0("\t", ident.Y, "\n")) + } +} + + + + +# _________________________________________________________________________________________________ +# Replace / overwrite / remove metadata ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +#' @title fix.orig.ident +#' +#' @description Remove the string "filtered_feature_bc_matrix." from "orig.ident". Helper function. +#' @param obj Seurat object, Default: merged.obj +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' merged.obj$orig.ident <- fix.orig.ident(obj = merged.obj) +#' table(merged.obj$orig.ident) +#' } +#' } +#' @export +fix.orig.ident <- function(obj = merged.obj) { + fixed <- sub(obj$"orig.ident", pattern = "filtered_feature_bc_matrix.", replacement = "") + return(fixed) +} + + + +# _________________________________________________________________________________________________ +#' @title seu.RemoveMetadata +#' +#' @description Remove specified metadata columns from a Seurat object. +#' @param obj A Seurat object from which metadata columns will be removed. Default: combined.obj +#' @param cols_remove A character vector specifying metadata column names to remove. By default, it will remove all columns that do not start with "integr" or "cl.names". +#' @return A Seurat object with specified metadata columns removed. +#' @export +#' @examples +#' \dontrun{ +#' combined.obj <- seu.RemoveMetadata(obj = combined.obj, cols_remove = c("column1", "column2")) +#' } +seu.RemoveMetadata <- function( + obj = combined.obj, + cols_remove = grepv(colnames(obj@meta.data), pattern = "^integr|^cl.names", perl = TRUE)) { + CNN <- colnames(obj@meta.data) + iprint("cols_remove:", cols_remove) + print("") + (cols_keep <- setdiff(CNN, cols_remove)) + obj@meta.data <- obj@meta.data[, cols_keep] + iprint("meta.data colnames kept:", colnames(obj@meta.data)) + + return(obj) +} + + +# _________________________________________________________________________________________________ +# Export or Transfer metadata ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Save Metadata from a List of Seurat Objects +#' +#' @description This function takes a list of Seurat objects, extracts their metadata, and saves it to a file with a specified suffix. +#' +#' @param ls.obj A list of Seurat objects. +#' @param suffix A character string to append to the filename when saving metadata. +#' @return Invisible list of metadata frames +#' @export +saveLsSeuratMetadata <- function(ls.obj, suffix) { + stopifnot(is.list(ls.obj)) # Check if input is a list + message(length(ls.obj), " objects") + ls.meta <- setNames(lapply(ls.obj, function(x) x@meta.data), names(ls.obj)) + + ncolz <- unique(sapply(ls.meta, ncol)) + message(ncolz, " columns in meta.data") + if (length(ncolz) > 1) warning("Different column counts across meta.data!", immediate. = TRUE) + xsave(ls.meta, suffix = suffix) + invisible(ls.meta) +} + + +# _________________________________________________________________________________________________ +#' @title Transfer Multiple Metadata Columns Between Two Seurat Objects +#' +#' @description Transfers specified metadata columns from one Seurat object to another, +#' with options for verbose output and overwriting existing columns. Checks for cell overlap and +#' reports percentages of matching and unique cells. +#' +#' @param from The source Seurat object from which metadata will be transferred. +#' @param to The destination Seurat object to which metadata will be added. +#' @param colname_from Vector of names for the columns in the source object's metadata to transfer. +#' @param colname_to Vector of names for the columns in the destination object's metadata. +#' Defaults to the same names as `colname_from`. Must be the same length as `colname_from` unless +#' it is the same as `colname_from`. +#' @param overwrite Logical, indicating whether to overwrite the column in the destination object +#' if it already exists. Defaults to FALSE. +#' @param plotUMAP Logical, indicating whether to plot UMAPs of the destination object with +#' the new identity. +#' @param strict Logical, indicating whether to fail if the destination object have cells not found in the source object. +#' @param verbose Logical, indicating whether to print details about the transfer, including the +#' number and percentage of matching cells between objects, and unique cells in each object. +#' @param ... Additional arguments to be passed to `transferMetadata`. +#' +#' @return Returns the destination Seurat object (`to`) with the new metadata columns added. +#' +#' @examples +#' # Assuming `object1` and `object2` are Seurat objects, and you want to transfer +#' # metadata columns named 'patientID' and 'treatmentGroup' from `object1` to `object2`: +#' object2 <- transferMetadata( +#' from = object1, to = object2, +#' colname_from = c("patientID", "treatmentGroup") +#' ) +#' +#' @details This function is useful for merging related data from separate Seurat objects, +#' ensuring that relevant metadata is consistent across datasets. The function checks for +#' the existence of the specified columns in the source object and whether the columns +#' can be overwritten in the destination object. It also reports on cell overlap between +#' the two objects, which can be useful for understanding the relationship between datasets. +#' +#' @export +transferMetadata <- function(from, to, + colname_from, + colname_to = colname_from, + overwrite = FALSE, + plotUMAP = TRUE, + strict = TRUE, + verbose = TRUE, + ...) { + # + stopifnot( + is(from, "Seurat"), is(to, "Seurat"), + is.character(colname_from), is.character(colname_to), + all(colname_from %in% colnames(from@meta.data)), + "Length of 'colname_from' and 'colname_to' must be equal" = + length(colname_from) == length(colname_to) + ) + + # Check cell overlaps + cells_in_both <- intersect(colnames(from), colnames(to)) + cells_only_in_from <- setdiff(colnames(from), colnames(to)) + cells_only_in_to <- setdiff(colnames(to), colnames(from)) + nr.cells.both <- length(cells_in_both) + nr.cells.only.from <- length(cells_only_in_from) + nr.cells.only.to <- length(cells_only_in_to) + + + + # Print cell overlap information _______________________________________________________ + if (verbose) { + cat( + "Cells matching between objects:", nr.cells.both, + "(", sprintf("%.2f%%", nr.cells.both / length(colnames(from)) * 100), "of from and", + sprintf("%.2f%%", nr.cells.both / length(colnames(to)) * 100), "of to)\n" + ) + cat( + "Cells only in obj1 (from):", length(cells_only_in_from), + "(", sprintf("%.2f%%", nr.cells.only.from / length(colnames(from)) * 100), ")\n" + ) + cat( + "Cells only in obj2 (to):", nr.cells.only.to, + "(", sprintf("%.2f%%", nr.cells.only.to / length(colnames(to)) * 100), ")\n" + ) + } + + if(strict) { + stopifnot("There are cells ONLY present in the destination object. Cannot transfer metadata." = (nr.cells.only.to == 0) ) + } else { + warning("There are cells ONLY present in the destination object. Filled with NA", immediate. = TRUE) + } + + if(nr.cells.only.from > 0 & verbose) warning("There are cells ONLY present in the FROM object. These will be ignored.", immediate. = TRUE) + + + # Transfer metadata columns _______________________________________________________ + for (i in seq_along(colname_from)) { + + # Check if to-column exists in destination object OR you overwrite anyway + if (!(colname_to[i] %in% colnames(to@meta.data)) || overwrite) { + + # Check if column exists in source object + if (colname_from[i] %in% colnames(from@meta.data)) { + + # Transfer the metadata column + # to[[colname_to[i]]] <- from[[colname_from[i]]] + + metadata_from <- getMetadataColumn(obj = from, col = colname_from[i]) + to <- addMetaDataSafe(obj = to, col.name = colname_to[i], metadata = metadata_from[colnames(to)], + strict = strict) + + message(sprintf("Transferred '%s' to '%s'.", colname_from[i], colname_to[i])) + } else { + warning(sprintf("Column '%s' not found in source object.", colname_from[i]), immediate. = TRUE) + } + } else { + warning(sprintf( + "Column '%s' already exists in destination object. Set 'overwrite = TRUE' to overwrite.", + colname_to[i] + ), immediate. = TRUE) + } + + # Plot umap _______________________________________________________ + if (plotUMAP) { + x <- clUMAP(obj = to, ident = colname_to[i], suffix = "transferred.ident", ...) + print(x) + } + + } # for + + return(to) +} + + + +# _________________________________________________________________________________________________ +# Subset metadata ______________________________ ---- +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +#' @title Sample N % of a dataframe (obj@metadata), and return rownames (cell IDs). +#' +#' @description This function samples a specified percentage of a dataframe (specifically a subset +#' of the metadata of a Seurat object) and returns the corresponding cell IDs. +#' @param metaDF A dataframe representing a subset of the metadata of a Seurat object. Default: +#' Subset of 'MetaData' for which 'Pass' is TRUE. +#' @param pc The percentage of the dataframe to sample, expressed as a decimal. Default: 0.1. +#' @return A vector of sampled cell IDs. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Example usage: +#' # Suppose 'MetaData' is a dataframe and 'Pass' is a boolean vector with the same length. +#' # The following example will sample 10% of the rows of 'MetaData' where 'Pass' is TRUE. +#' sampleNpc(metaDF = MetaData[which(Pass), ], pc = 0.1) +#' } +#' } +#' @export +sampleNpc <- function(metaDF = MetaData[which(Pass), ], pc = 0.1) { + cellIDs <- rownames(metaDF) + nr_cells <- floor(length(cellIDs) * pc) + cellIDs.keep <- sample(cellIDs, size = nr_cells, replace = FALSE) + return(cellIDs.keep) +} + + + +# _________________________________________________________________________________________________ +# Combine metadata ______________________________ ---- +# _________________________________________________________________________________________________ + +#' @title Merge Seurat Metadata +#' @description Merges the `@metadata` from a list of Seurat objects, binds them by row, and applies optional inclusion/exclusion of columns. +#' @param ls_obj A list of Seurat objects. +#' @param include_cols A character vector of column names to include (default NULL for all columns). +#' @param exclude_cols A character vector of column names to exclude (default NULL for no exclusions). +#' @return A merged dataframe of metadata from all Seurat objects. +#' @importFrom dplyr bind_rows select +#' @export +merge_seurat_metadata <- function(ls_obj, include_cols = NULL, exclude_cols = NULL) { + + # Assert that ls_obj is a list and all elements are Seurat objects + stopifnot(is.list(ls_obj), length(ls_obj) > 0, + all(sapply(ls_obj, function(x) inherits(x, "Seurat")))) # Ensure all are Seurat objects + + # Extract metadata from each Seurat object + metadata_list <- lapply(ls_obj, function(seurat_obj) seurat_obj@meta.data) + + # Assert that the number of columns are the same in all metadata + col_counts <- sapply(metadata_list, ncol) + stopifnot(length(unique(col_counts)) == 1) # Ensure all metadata have the same number of columns + + # Assert that rownames (cell names) are unique across all metadata + cell_names <- unlist(lapply(metadata_list, rownames)) + stopifnot(length(unique(cell_names)) == length(cell_names)) # Ensure all cell names are unique + + # Optionally select columns to include or exclude + if (!is.null(include_cols)) { + metadata_list <- lapply(metadata_list, function(md) dplyr::select(md, all_of(include_cols))) + } else if (!is.null(exclude_cols)) { + metadata_list <- lapply(metadata_list, function(md) dplyr::select(md, -all_of(exclude_cols))) + } + + # Bind the metadata by row + merged_metadata <- dplyr::bind_rows(metadata_list) + + # Message all column names to console + message("Columns: ", kppc(colnames(merged_metadata)), "\n") + message(length(merged_metadata), " merged columns and ", nrow(merged_metadata), " cells from ", length(ls_obj), " objects.") + + return(merged_metadata) +} + +# Example usage: +# merged_metadata <- merge_seurat_metadata(ls_obj, include_cols = c("nFeature_RNA", "nCount_RNA")) + + + +#' @title Combine Metadata from a list of Seurat objects and Write to TSV +#' +#' @description +#' Formerly `writeMetadataToTsv`. `writeCombinedMetadataToTsvFromLsObj` takes a list of ls.Obj, extracts their `@meta.data` slots, +#' removes specified columns, checks for column consistency, creates a barplot showing the number +#' of rows per object, and finally merges these into one large data frame. +#' +#' @param ls.Obj A list of objects, each containing a `@meta.data` slot. +#' @param cols.remove A character vector of column names to be removed from each metadata data frame. +#' Default is an empty character vector, meaning no columns will be removed. +#' @param save_as_qs A logical indicating whether to save the merged metadata as a .qs object. +#' @param save_as_tsv A logical indicating whether to save the merged metadata as a .tsv file. +#' @param ... Additional arguments to be passed to `write.table`. +#' +#' @details +#' The function starts by validating the input to ensure it's a list. It then extracts the `@meta.data` +#' from each object, removing the specified columns. It checks if all data frames have the same columns +#' and issues a warning if not. A barplot is created to visualize the number of rows (cells) per object. +#' Finally, it merges all the metadata into one large data frame and prints its dimensions. +#' +#' @return A large data frame that is the row-wise merge of all `@meta.data` data frames. +#' +#' @examples +#' # Assuming a list of Seurat objects with meta.data +#' mergedMetaData <- writeMetadataToTsv(seuratObjectsList, cols.remove = c("column1", "column2")) +#' +#' @note +#' This function is intended for use with S4 objects that have a `@meta.data` slot. +#' The function currently contains a `browser()` call for debugging purposes, which should be removed in production. +#' +#' @export +writeCombinedMetadataToTsvFromLsObj <- function(ls.Obj, cols.remove = character(), + save_as_qs = TRUE, save_as_tsv = TRUE, ...) { + warning("writeMetadataToTsv is EXPERIMENTAL. It writes out subset of columns", immediate. = TRUE) + stopifnot(is.list(ls.Obj)) # Validate that input is a list + + # Extract metadata from each object and remove specified columns + metadataList <- lapply(ls.Obj, function(obj) { + stopifnot("meta.data" %in% slotNames(obj)) # Check for meta.data slot + metaData <- obj@meta.data + metaData[, !(names(metaData) %in% cols.remove)] + }) + + # Find common columns and subset + commonCols <- CodeAndRoll2::intersect.ls(lapply(metadataList, names)) + metadataList <- lapply(metadataList, function(df) df[, commonCols, drop = FALSE]) + + + # Check if qbarplot is available and create a barplot showing the number of rows per object + metadata.cells.per.obj <- sapply(metadataList, nrow) + print(metadata.cells.per.obj) + pobj <- ggExpress::qbarplot(metadata.cells.per.obj, + label = metadata.cells.per.obj, ylab = "cells", + save = FALSE + ) + print(pobj) + + # Merge metadata into one big data frame + mergedMetaData <- do.call(rbind, metadataList) + + # Print dimensions of the merged data frame + print(dim(mergedMetaData)) + + if (save_as_qs) xsave(mergedMetaData) + if (save_as_tsv) ReadWriter::write.simple.tsv(mergedMetaData, ...) + + # Return the merged data frame + invisible(mergedMetaData) +} + + + +# _________________________________________________________________________________________________ +# Plot metadata ______________________________ ---- +# _________________________________________________________________________________________________ +#' @title Plot Metadata Correlation Heatmap +#' +#' @description This function plots a heatmap of metadata correlation values. It accepts a Seurat object +#' and a set of metadata columns to correlate. The correlations are calculated using either Pearson +#' or Spearman methods, and the resulting heatmap can include the principal component (PCA) values +#' and be saved with a specific suffix. +#' +#' @param columns A vector of metadata column names to calculate correlations. +#' Default: c("nCount_RNA", "nFeature_RNA", "percent.mito", "percent.ribo"). +#' @param obj The main Seurat object used for calculations. No default value. +#' @param cormethod The method to calculate correlations. Can be either "pearson" or "spearman". Default: "pearson". +#' @param main The main title for the plot. Default: "Metadata correlations" followed by the correlation method. +#' @param show_numbers Logical, determines if correlation values should be displayed on the plot. Default: `FALSE`.. +#' @param digits The number of decimal places for displayed correlation values. Default: 1. +#' @param suffix A suffix added to the output filename. Default: NULL. +#' @param add_PCA Logical, determines if PCA values should be included in the correlation calculation. Default: `TRUE`.. +#' @param n_PCs The number of PCA components to be included if 'add_PCA' is TRUE. Default: 8. +#' @param w The width of the plot in inches. Default: ceiling((length(columns)+n_PCs)/2). +#' @param h The height of the plot in inches. Default: the value of w. +#' @param use_ggcorrplot Logical, determines if the ggcorrplot package should be used for plotting. Default: `FALSE`.. +#' @param n_cutree The number of clusters to be used in hierarchical clustering. Default: the number of PCs. +#' @param ... Additional parameters passed to the internally called ggcorrplot function. +#' +#' @seealso +#' \code{\link[ggcorrplot]{ggcorrplot}} +#' @importFrom ggcorrplot ggcorrplot +#' @importFrom pheatmap pheatmap +#' @export +plotMetadataCorHeatmap <- function( + columns = c("nCount_RNA", "nFeature_RNA", "percent.mito", "percent.ribo"), + obj, + cormethod = c("pearson", "spearman")[1], + main = paste("Metadata", cormethod, "correlations"), + show_numbers = FALSE, + digits = 1, + suffix = NULL, + add_PCA = TRUE, + n_PCs = 8, + w = ceiling((length(columns) + n_PCs) / 2), h = w, + use_ggcorrplot = FALSE, + n_cutree = (n_PCs), + ...) { + meta.data <- obj@meta.data + columns.found <- intersect(colnames(meta.data), columns) + columns.not.found <- setdiff(columns, colnames(meta.data)) + if (length(columns.not.found)) iprint("columns.not.found:", columns.not.found) + + meta.data <- meta.data[, columns.found] + + if (add_PCA) { + stopif(is.null(obj@reductions$"pca"), "PCA not found in @reductions.") + main <- paste("Metadata and PC", cormethod, "correlations") + suffix <- FixPlotName(suffix, "w.PCA") + + PCs <- obj@reductions$pca@cell.embeddings + stopifnot(nrow(meta.data) == nrow(PCs)) + meta.data <- cbind(PCs[, 1:n_PCs], meta.data) + } + + corX <- cor(meta.data, method = cormethod) + if (use_ggcorrplot) { + pl <- ggcorrplot::ggcorrplot(corX, + title = main, + hc.order = TRUE, + digits = digits, + lab = show_numbers, + type = "full", + ... + ) + ggExpress::qqSave(pl, fname = FixPlotName(make.names(main), suffix, "pdf"), w = w, h = h) + } else { + pl <- pheatmap::pheatmap(corX, + main = main, treeheight_row = 2, treeheight_col = 2, + cutree_rows = n_cutree, cutree_cols = n_cutree + ) + wplot_save_pheatmap( + x = pl, width = w, + plotname = FixPlotName(make.names(main), suffix, "pdf") + ) + } + pl +} + + +# _________________________________________________________________________________________________ +#' @title Calculate and plot heatmap of cluster medians +#' +#' @description This function calculates the median of specified variables in a dataframe, +#' grouped by a column ('ident'). The function also provides an option to scale the medians, +#' subset the ident levels, and either return a matrix of median values or plot a heatmap. +#' +#' @param meta A dataframe containing metadata from a Seurat object. +#' @param ident A character string representing the column name to be used for grouping the data. +#' @param subset_ident_levels An optional vector of ident levels to subset. Default is FALSE. +#' @param variables A character vector containing the names of columns for which to calculate the median. +#' @param scale A logical indicating whether to scale the medians. Default is TRUE. +#' @param suffix A character string added to the plot file name if not returning a matrix. Default is NULL. +#' @param return_matrix A logical indicating whether to return a matrix of medians, or plot a heatmap. Default is FALSE. +#' @param plotname A character string representing the main title for the plot. Default is "Median metadata values by cluster". +#' @param n_cut_row The number of row rows to cut the tree into on the `pheatmap`. Default is NA (none). +#' @param n_cut_col The number of column clusters to cut the tree into on the `pheatmap`. Default is NA (none) +#' @param w The width of the plot (if not returning a matrix). Default is half the number of variables. +#' @param ... Additional parameters passed to the pheatmap function. +#' +#' @return If 'return_matrix' is TRUE, a matrix where rows correspond to the unique values of 'ident', +#' and columns correspond to 'variables'. Each element of the matrix represents the median of a specific +#' variable for a specific group. If 'return_matrix' is FALSE, it saves a heatmap plot and returns the plot object. +#' +#' @importFrom dplyr group_by summarize_at +#' @importFrom ReadWriter FirstCol2RowNames +#' @importFrom pheatmap pheatmap +#' @import tidyverse +#' @export +heatmap_calc_clust_median <- function( + meta, ident, subset_ident_levels = FALSE, + variables, scale = TRUE, + suffix = NULL, + return_matrix = FALSE, + plotname = "Median metadata values by cluster", + n_cut_row = NA, + n_cut_col = NA, + w = ceiling(length(variables) / 2), + ...) { + # Ensure that 'meta' is a dataframe, 'ident' is a column in 'meta', and 'variables' are columns in 'meta' + stopifnot(is.data.frame(meta), + is.character(ident), + is.character(variables), + ident %in% colnames(meta), + all(variables %in% colnames(meta)) + ) + + # Group by 'ident' and calculate median for each variable + df_cluster_medians <- meta |> + group_by(meta[[ident]]) |> + summarize_at(vars(variables), median, na.rm = TRUE) + df_cluster_medians <- ReadWriter::column.2.row.names(df_cluster_medians) + + if (!isFALSE(subset_ident_levels)) { + stopifnot(all(subset_ident_levels %in% rownames(df_cluster_medians))) + suffix <- FixPlotName(suffix, "subset") + df_cluster_medians <- df_cluster_medians[subset_ident_levels, ] + } + + if (scale) { + df_cluster_medians <- scale(df_cluster_medians) + suffix <- sppp(suffix, "scaled") + } + + if (return_matrix) { + return(df_cluster_medians) + } else { + plot_name <- FixPlotName(plotname, suffix) + pl <- pheatmap::pheatmap(df_cluster_medians, + main = plot_name, + cutree_rows = n_cut_row, + cutree_cols = n_cut_col, + ... + ) + wplot_save_pheatmap( + x = pl, width = w, + plotname = FixPlotName(make.names(plot_name), suffix, "pdf") + ) + + # Now plot correlation heatmap between the identites + corX <- cor(t(df_cluster_medians), method = "spearman") + pl <- pheatmap::pheatmap(corX, + main = paste0("Correlation between ", plot_name), + treeheight_row = 2, treeheight_col = 2, + # cluster_cols = F, cluster_rows = F, + cutree_rows = n_cut_row, cutree_cols = n_cut_col + ) + + wplot_save_pheatmap( + x = pl, width = w, + plotname = FixPlotName(make.names(plot_name), suffix, "correlation.pdf") + ) + + + } +} + + + +# _________________________________________________________________________________________________ +#' @title plotMetadataMedianFractionBarplot +#' +#' @description Generates a barplot of metadata median values. +#' @param columns A vector of column names to consider for the barplot. Default: c("percent.mito", "percent.ribo"). +#' @param suffix A suffix added to the output filename. Default: NULL. +#' @param group.by The variable to group by for calculations. Default: Second result of GetClusteringRuns(obj). +#' @param method Method used for calculations, either "median" or "mean". Default: "median". +#' @param min.thr Minimum threshold percentage for a cluster. Default: 2.5. +#' @param return.matrix Logical; if TRUE, returns a matrix. Default: `FALSE`.. +#' @param main Main title for the plot. Default: "read fractions per transcript class and cluster" followed by the method and suffix. +#' @param ylab Label for the y-axis. Default: "Fraction of transcriptome (%)". +#' @param percentify Logical. If TRUE, multiplies the fraction by 100. Default: `TRUE`.. +#' @param subt Subtitle for the plot. Default: NULL. +#' @param position Position adjustment for geoms. Default: position_stack(). +#' @param w The width of the plot. Default: 10. +#' @param h The height of the plot. Default: 6. +#' @param obj The main Seurat object used for calculations. Default: combined.obj. +#' @param ... Additional parameters passed to the internally called functions. +#' @seealso +#' \code{\link[dplyr]{summarise_all}} +#' \code{\link[reshape2]{melt}} +#' @export plotMetadataMedianFractionBarplot +#' @importFrom dplyr summarize_all +#' @importFrom reshape2 melt + +plotMetadataMedianFractionBarplot <- function( + columns = c("percent.mito", "percent.ribo"), + suffix = NULL, + group.by = GetClusteringRuns(obj = obj)[2], + method = c("median", "mean")[1], + min.thr = 2.5 # At least this many percent in at least 1 cluster + , return.matrix = FALSE, + main = paste(method, "read fractions per transcript class and cluster", suffix), + ylab = "Fraction of transcriptome (%)", + percentify = TRUE, + subt = NULL, + position = position_stack(), + w = 10, h = 6, + obj = combined.obj, + ...) { + meta.data <- obj@meta.data + stopifnot(group.by %in% colnames(meta.data)) + columns.found <- intersect(colnames(meta.data), c(group.by, columns)) + + (mat.cluster.medians1 <- meta.data[, columns.found] |> + group_by_at(group.by) |> + dplyr::summarize_all(median) + ) + if (min.thr > 0) { + pass.cols <- colMax(mat.cluster.medians1[, -1]) > (min.thr / 100) + cols.OK <- which_names(pass.cols) + cols.FAIL <- which_names(!pass.cols) + subt <- paste(length(cols.FAIL), "classed do not reach", min.thr, "% :", kpps(cols.FAIL)) + iprint(subt) + mat.cluster.medians1 <- mat.cluster.medians1[, c(group.by, cols.OK)] + } + + + mat.cluster.medians <- mat.cluster.medians1 |> + reshape2::melt(id.vars = c(group.by), value.name = "Fraction") + + + if (percentify) mat.cluster.medians$"Fraction" <- 100 * mat.cluster.medians$"Fraction" + + pl <- ggbarplot(mat.cluster.medians, + x = group.by, y = "Fraction", fill = "variable", + position = position, + title = main, subtitle = subt, ylab = ylab + ) + ggExpress::qqSave(pl, fname = ppp(make.names(main), "pdf"), w = w, h = h) + pl + if (return.matrix) mat.cluster.medians1 else pl +} + + + +# _________________________________________________________________________________________________ +#' @title Plot Metadata Category Pie Chart +#' +#' @description Generates a pie chart visualizing the distribution of categories within a specified +#' metadata column of a Seurat object. +#' +#' @param metacol The metadata column to visualize. +#' @param plot_name Name of the plot to generate. +#' @param obj Seurat object containing the metadata. Default: `combined.obj`. +#' @param max.categs The maximum number of categories to display in the pie chart. +#' If the number of categories exceeds this value, an error is thrown. +#' @param both_pc_and_value If `TRUE`, labels on the pie chart will show both the percentage +#' and the count of each category. If `FALSE`, only the percentage is shown. +#' @param subtitle Optional subtitle for the pie chart. +#' @param labels Optional labels for the pie chart. +#' +#' @param ... Additional arguments to pass to the pie chart plotting function. +#' +#' @examples +#' \dontrun{ +#' plotMetadataCategPie( +#' metacol = "Singlet.status", +#' plot_name = "Singlet Status Distribution", +#' obj = combined.obj, +#' max.categs = 20, +#' both_pc_and_value = TRUE +#' ) +#' } +#' +#' @return A pie chart visualizing the distribution of categories within the specified metadata column. +#' @export +plotMetadataCategPie <- function( + metacol = "Singlet.status", + plot_name = paste(metacol, "distribution"), + obj = combined.obj, + max.categs = 20, + both_pc_and_value = TRUE, + subtitle = NULL, + labels = NULL, + LegendSide = FALSE, + ...) { + # + categ_pivot <- table(obj[[metacol]]) + stopifnot(length(categ_pivot) < max.categs) + + qpie(categ_pivot, + plotname = FixPlotName(make.names(plot_name)), + both_pc_and_value = both_pc_and_value, + LegendSide = LegendSide, labels = labels, + LegendTitle = "", subtitle = subtitle, + ...) +} + + + +# _________________________________________________________________________________________________ +# Label / identity transfer across objects ______________________________ ---- +# _________________________________________________________________________________________________ + + + + +#' @title Rename Azimuth Columns in Seurat Object +#' +#' @description Dynamically renames specified metadata columns in a Seurat object, particularly those +#' prefixed with "predicted." and the "mapping.score" column, by applying a new prefix +#' that combines a user-defined prefix and a reference name. +#' +#' @param obj A Seurat object containing metadata in `meta.data` that needs column names to be renamed. +#' @param ref A character string specifying the reference; defaults to "humancortexref". +#' The "ref" part of the string will be removed in the new column names. +#' @param prefix A character string to prefix to the column names, defaulting to "azi". +#' This prefix is combined with the modified `ref` to form the new column names. +#' @param azim_cols Azimuth columns +#' @return Returns the Seurat object with renamed metadata columns. +#' +#' @examples +#' # Assuming `obj` is a Seurat object with metadata columns following the "predicted." pattern: +#' obj <- renameAzimuthColumns(obj, ref = "humancortexref", prefix = "azi") +#' # This will rename columns like "predicted.class" to "azi.humancortex.class" +#' # and include "mapping.score" as "azi.humancortex.mapping.score" +#' +#' @export +renameAzimuthColumns <- function(obj, ref = c("humancortexref", "fetusref")[1], + prefix = "azi", + azim_cols = CodeAndRoll2::grepv( + x = tail(colnames(obj@meta.data), 10), + pattern = "predicted." + )) { + stopifnot( + "obj must be a Seurat object" = is(obj, "Seurat"), + "azim_cols must be non-empty" = length(azim_cols) > 0 + ) + + ref <- sub(pattern = "ref", replacement = "", x = ref) + iprint(length(azim_cols), "azim_cols:", azim_cols) + + # Extract the column names of meta.data + meta_col_names <- colnames(obj@meta.data) + + # Loop through the azim_cols and replace the prefix if they exist in meta.data + for (azim_col in azim_cols) { + if (azim_col %in% meta_col_names) { + # Create the new column name by replacing "predicted." with the new prefix + new_col_name <- sub(pattern = "^predicted\\.", replacement = kpp(prefix, ref, ""), x = azim_col) + names(obj@meta.data)[names(obj@meta.data) == azim_col] <- new_col_name + } + } + + if ("mapping.score" %in% colnames(obj@meta.data)) { + names(obj@meta.data)[names(obj@meta.data) == "mapping.score"] <- kpp(prefix, ref, "mapping.score") + } + + print(tail(colnames(obj@meta.data), 10)) + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Rename Small Categories in Seurat Object Metadata +#' +#' @description This function renames categories within a specified identity column of a +#' Seurat object's metadata that have fewer cells than a specified minimum threshold. +#' Categories below this threshold are renamed to a common name, typically "unclear", +#' to clean up small, potentially noisy categories. +#' @param obj A Seurat object containing the metadata with categories to be cleaned. +#' @param idents A character vector specifying the names of the identity columns within +#' `obj@meta.data` where categories are to be renamed. +#' @param min.cells An integer specifying the minimum number of cells a category must have +#' to retain its original name. Categories with fewer cells than this threshold will be +#' renamed. Defaults to the greater of the total number of columns divided by 2000 or 10. +#' @param new.name A character string specifying the new name to assign to small categories. +#' Defaults to "unclear". +#' +#' @return Returns the Seurat object with renamed categories in the specified metadata columns. +#' +#' @examples +#' # Assuming obj is a Seurat object with identity columns "ident1" and "ident2": +#' idents <- c("ident1", "ident2") +#' obj <- renameSmallCategories(obj, idents = idents) +#' +#' @export +renameSmallCategories <- function( + obj, + idents = c("predicted.class", "predicted.cluster", "predicted.subclass"), + min.cells = max(round((ncol(obj)) / 2000), 10), + new.name = "unclear") { + stopifnot("obj must be a Seurat object" = is(obj, "Seurat")) + + for (ident in idents) { + if (ident %in% colnames(obj@meta.data)) { + # Count the number of cells per category in the specified identity column + category_counts <- table(obj@meta.data[[ident]]) + + # Identify categories with fewer cells than min.cells + small_categories <- names(category_counts[category_counts < min.cells]) + + # Initial number of categories + initial_categories <- length(unique(obj@meta.data[[ident]])) + + # Rename the categories in the ident column that have fewer cells than min.cells to new.name + obj@meta.data[[ident]] <- ifelse(obj@meta.data[[ident]] %in% small_categories, new.name, obj@meta.data[[ident]]) + + # Report to console + cells_renamed <- sum(obj@meta.data[[ident]] == new.name) + categories_removed <- length(small_categories) + remaining_categories <- length(unique(obj@meta.data[[ident]])) + + message( + "For ident '", ident, "':\n", + cells_renamed, " cells were renamed.\n", + remaining_categories, " of initial ", initial_categories, " categories remained.\n\n", + "Removed categories: ", paste(head(small_categories, 10), collapse = ", "), "\n" + ) + } else { + message("Ident column '", ident, "' does not exist in obj@meta.data.") + } + } + + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Transfer labels from a reference Seurat object to a query Seurat object +#' +#' @description Function to transfer labels from a reference Seurat object to a query Seurat object +#' using anchoring and transfer data methods from the Seurat package. It then visualizes the +#' reference and the combined objects using Uniform Manifold Approximation and Projection (UMAP). +#' +#' @param query_obj A Seurat object for which the labels are to be transferred. +#' @param reference_obj Alternative to `reference_path`. If provided, the path is not used to load +#' the reference data. +#' @param reference_path A character string indicating the file path to the reference Seurat object. +#' The path must exist. +#' @param reference_ident A character string specifying the name of the identity class to be used +#' from the reference Seurat object. Default is 'RNA_snn_res.0.3.ordered.ManualNames'. +#' @param anchors A list of anchors obtained from the FindTransferAnchors function. If NULL, the +#' @param new_ident A character string specifying the name of the new identity class to be +#' created in the query Seurat object. Default is obtained by replacing 'ordered' with +#' 'transferred' in reference_ident. +#' @param predictions_col A character string specifying the column in the metadata of the transferred +#' Seurat object containing the transferred labels. Default is 'predicted.id'. +#' @param predictions_score A character string specifying the column in the metadata of the transferred +#' Seurat object containing the scores of the transferred labels. Default is 'transferred.score'. +#' @param save_anchors A logical indicating whether to save the anchors as an RDS file. Default is TRUE. +#' @param reference_suffix A character string to be used as in the subtitle of the reference UMAP plot. +#' Default is 'REFERENCE.obj'. +#' @param plot_suffix A character string to be added to the UMAP with the new identity. Default is NULL. +#' @param plot_reference A logical indicating whether to plot the reference UMAP. Default is TRUE. +#' @param h Height for the saved image. Default: `12` +#' @param w Width for the saved image. Default: `9` +#' @param ... Additional arguments passed to the `Seurat.utils::clUMAP` function. +#' +#' @return The modified query Seurat object with the transferred labels as a new identity class. +#' +#' @examples +#' # combined.objX <- transferLabelsSeurat(reference_ident = 'RNA_snn_res.0.3.ordered.ManualNames', +#' # reference_obj = reference_obj, +#' # query_obj = combined.obj) +#' +#' @importFrom readr read_rds +#' @importFrom Seurat FindTransferAnchors TransferData AddMetaData +#' +#' @export +transferLabelsSeurat <- function( + query_obj, + reference_obj, + reference_path = NULL, + reference_ident, + anchors = NULL, + new_ident = gsub( + pattern = "ordered", + replacement = "transferred", + x = reference_ident + ), + predictions_col = "predicted.id", + predictions_score = sppp(new_ident, "score"), + save_anchors = TRUE, + reference_suffix = "REFERENCE.obj", + plot_suffix = NULL, + plot_reference = TRUE, + w = 12, h = 9, + ...) { + # + if (is.null(reference_obj)) { + iprint("Loading reference object:", basename(reference_path)) + stopifnot(file.exists(reference_path)) + reference_obj <- readr::read_rds(reference_path) + } else { + stopifnot(inherits(reference_obj, "Seurat") & min(dim(reference_obj)) > 10) + } + + # Report + nr.cl.ref <- CodeAndRoll2::nr.unique(reference_obj[[reference_ident]]) + message("reference_ident ", reference_ident, " has ", nr.cl.ref, " categories") + + # Visualize reference object + if (plot_reference) { + clUMAP( + obj = reference_obj, ident = reference_ident, label = F, + suffix = reference_suffix, sub = reference_suffix, + w = w, h = h, ... + ) + } + + if (is.null(anchors)) { + message("Calculating anchors. Provide anchors in 'anchors' to speed up.") + tictoc::tic() + anchors <- Seurat::FindTransferAnchors(reference = reference_obj, query = query_obj) + if (save_anchors) xsave(obj = anchors) + tictoc::toc() + } else { + message("Anchors provided") + } + + message("Transferring labels") + tictoc::tic() + transferred_clIDs <- Seurat::TransferData( + anchorset = anchors, + refdata = reference_obj@meta.data[, reference_ident], + ) + tictoc::toc() + + # Add New Labels to query object + query_obj <- Seurat::AddMetaData( + object = query_obj, metadata = transferred_clIDs[, predictions_col], + col.name = new_ident + ) + + # Add Labels assignment scores to query object + query_obj <- Seurat::AddMetaData( + object = query_obj, metadata = transferred_clIDs[, "prediction.score.max"], + col.name = predictions_score + ) + + qSeuViolin(feature = ppp(new_ident, 'score'), ident = new_ident, + sub = Seurat.utils:::.parseBasicObjStats(query_obj), + pt.size = 0.0, obj = query_obj) + + + # Visualize combined object + clUMAP( + ident = new_ident, obj = query_obj, suffix = plot_suffix, label = F, + w = w, h = h, ... + ) + qUMAP( + feature = predictions_score, obj = query_obj, suffix = plot_suffix, label = F, + w = w, h = h, ... + ) + + return(query_obj) +} + + +# _________________________________________________________________________________________________ +#' @title Extract meta.data Column Names Matching a Pattern +#' +#' @param obj A dataframe from which to extract column names. +#' @param pattern A regular expression pattern to match column names against. +#' +#' @return A character vector of column names matching the pattern. +#' +#' @examples +#' # Assuming 'df' is a dataframe with column names "azi.one", "azi.two", "other" +#' extract_matching_columns(df, "^azi\\.") +.metaColnames <- function(obj = combined.obj, pattern, perl = TRUE, ...) { + colz <- grep(pattern, colnames(obj@meta.data), value = TRUE, perl = perl, ...) + dput(colz) + return(colz) +} + + +# _________________________________________________________________________________________________ +#' @title Match and Translate Best Identity +#' +#' @description This function matches the best identity from `ident_to_rename` to `reference_ident` in an object, +#' in other words, it replaces original categories with the most frequent ones from the reference, +#' hence helps to filter out less important categories. +#' +#' @param obj The object to update. This object must have a `meta.data` attribute which is a data frame +#' containing columns named as `ident_to_rename` and `reference_ident`. +#' @param ident_to_rename A string. The name of the column in `obj@meta.data` that is used as the source of identities. +#' There is no default value for this parameter. +#' @param reference_ident A string. The name of the column in `obj@meta.data` that is used as the target of identities. +#' There is no default value for this parameter. +#' @param prefix A string to add to the new identity column name. Default is prefix = Reference. +#' @param suffix ... +#' @param new_ident_name A string. The name for the newly created identity column in `obj@meta.data`. +#' Default is a concatenation: kpp(prefix, ident_to_rename, "match.to", reference_ident) . +#' @param plot_suffix A string. The suffix to add to the final UMAP. +#' @param h Height for the saved image. Default: 12 +#' @param w Width for the saved image. Default: 9 +#' @param ... Additional parameters to be passed to `.replace_by_most_frequent_categories` function. +#' +#' @return An updated version of `obj` with an additional column in `obj@meta.data` named as `new_ident_name` +#' representing the new identity. The function also generates a UMAP plot based on this new identity. +#' +#' @seealso \code{\link[clUMAP]{clUMAP}}, \code{\link[kpp]{kpp}}, \code{\link[FixPlotName]{FixPlotName}}, +#' \code{\link[.replace_by_most_frequent_categories]{.replace_by_most_frequent_categories}} +#' +#' @examples +#' \dontrun{ +#' updated_obj <- matchBestIdentity(my_obj, "origin_identity", "target_identity") +#' } +#' @export +matchBestIdentity <- function( + obj, ident_to_rename, + reference_ident = GetOrderedClusteringRuns(obj)[1], + prefix = Reference, + suffix = gsub(prefix, "", x = reference_ident), + # to_suffix = "matched", + # to_suffix = FixPlotName(gsub(pattern = "[a-zA-Z_]", replacement = "", x = ident_to_rename)), + new_ident_name = kpp(prefix, ident_to_rename, "match.to", suffix), + plot_suffix = prefix, + w = 12, h = 9, + ...) { + stopifnot("colname prefix undefined" = !is.null(prefix)) + + dictionary <- obj@meta.data[, c(ident_to_rename, reference_ident)] + + translation <- .replace_by_most_frequent_categories( + df = dictionary, show_plot = TRUE, suffix_barplot = ident_to_rename, ... + ) + + obj@meta.data[, new_ident_name] <- translation[, 1] + + imessage("new ident name:", new_ident_name) + px <- clUMAP(ident = new_ident_name, obj = obj, suffix = plot_suffix, w = w, h = h, ...) + print(px) + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Find Best Match: Replace Categories by the Most Frequent Match +#' +#' @description Used for mapping identity columns across objects. This function replaces each +#' category in a query column of a data frame with the most frequently corresponding category in a +#' reference column. It calculates the assignment quality, reports it, and optionally plots it. +#' @param df A data frame containing the data. +#' @param query_col The name of the column in 'df' whose categories are to be replaced. By default, +#' the first column of 'df' is used. +#' @param ref_col The name of the column in 'df' used as reference for replacement. By default, the +#' second column of 'df' is used. +#' @param show_plot Logical, whether to plot assignment quality. Defaults to TRUE. +#' @param suffix_barplot Suffix for barplot. +#' @param ... Additional parameters passed to the qbarplot function. +#' @return A data frame with categories in 'query_col' replaced by the most frequent match from +#' 'ref_col'. +#' +#' @importFrom dplyr group_by summarise arrange filter +#' @examples +#' \dontrun{ +#' .replace_by_most_frequent_categories(df = my_data) +#' (MXX <- as.tibble(structure( +#' c( +#' "Adjut", "Adjut", "Yearn", "Adjut", "Dwarf", "Adjut", +#' "Dwarf", "Adjut", "Dwarf", "Yearn", "Dwarf", "Dwarf", "Dwarf", +#' "Yearn", "Dwarf", "Dwarf", "Dwarf", "Zebra", "Yucca", "Plyer", +#' "Blaze", "Blaze", "Dazed", "Blaze", "Swept", "Bold", "Vixen", +#' "Bold", "Swept", "Dazed", "Mirth", "Witch", "Vixen", "Dazed", +#' "Swept", "Mirth", "Swept", "Vexed", "Query", "Yolk" +#' ), +#' .Dim = c(20L, 2L), .Dimnames = +#' list(NULL, c("RNA_snn_res.0.1.ordered", "RNA_snn_res.0.3.ordered")) +#' ))) +#' +#' z <- .replace_by_most_frequent_categories(df = MXX) +#' head(cbind(MXX[, 1], z[, 1])) +#' } +.replace_by_most_frequent_categories <- function( + df, query_col = colnames(df)[1], + ref_col = colnames(df)[2], + show_plot = TRUE, + suffix_barplot = NULL, + ext = "png", + ...) { + # Convert to data frame if it is not + if (!is.data.frame(df)) { + df <- as.data.frame(df) + } + + cat_query <- unique(df[[query_col]]) + imessage(length(cat_query), "categories to rename in", query_col, ":", head(cat_query), "...") + + cat_ref <- unique(df[[ref_col]]) + imessage(length(cat_ref), "reference categories in", ref_col, ":", head(cat_ref), "...") + + # Create a table of the most frequent reference values for each query category + replacement_table <- df |> + dplyr::group_by(!!sym(query_col), !!sym(ref_col)) |> + dplyr::summarise(n = n(), .groups = "drop") |> + dplyr::arrange(!!sym(query_col), desc(n)) |> + dplyr::filter(!duplicated(!!sym(query_col))) + + replacement_table[[ref_col]] <- make.unique(replacement_table[[ref_col]]) + + # Calculate assignment quality + total_counts <- table(df[[query_col]]) + quality <- replacement_table$n / total_counts[replacement_table[[query_col]]] + names(quality) <- paste0(names(quality), "->", replacement_table[[ref_col]]) + + # Report assignment quality + message("Assignment quality (proportion of total matches):") + # print(setNames(quality, replacement_table[[query_col]])) + + # Plot assignment quality + if (show_plot) { + px <- ggExpress::qbarplot(quality, + label = percentage_formatter(quality, digitz = 1), + ext = ext, + suffix = suffix_barplot, + plotname = "Assignment Quality", + filename = make.names(kpp("Assignment Quality", suffix_barplot, ext)), + subtitle = paste( + "From", colnames(df)[1], "->", colnames(df)[2], "| median", + percentage_formatter(median(quality)), "\n", + sum(quality > 0.5), "clusters above 50% match" + ), + hline = 0.5, filtercol = -1, + xlab = paste("Best query match to reference"), + ylab = "Proportion of Total Matches", + ... + ) + print(px) + } + + # Replace the query values with the most frequent reference values + df[[query_col]] <- replacement_table[[ref_col]][match(df[[query_col]], replacement_table[[query_col]])] + + return(df) +} diff --git a/R/Seurat.Utils.R b/R/Seurat.Utils.R index cf8e2a4..1f2a558 100644 --- a/R/Seurat.Utils.R +++ b/R/Seurat.Utils.R @@ -1,17 +1,17 @@ # ____________________________________________________________________ # Seurat.utils ---- # ____________________________________________________________________ -# source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.R") +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.R") # source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Metadata.R") # source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Visualization.R") # source("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") -devtools::check_man("~/GitHub/Packages/Seurat.utils/UMAP.Major_Celltypes.128962c.reference.png") +# devtools::check_man("~/GitHub/Packages/Seurat.utils") # devtools::load_all("~/GitHub/Packages/Seurat.utils") # devtools::document("~/GitHub/Packages/Seurat.utils"); devtools::load_all("~/GitHub/Packages/Seurat.utils") -file.remove("~/GitHub/Packages/Seurat.utils/weight3.bar.png") +# file.remove("~/GitHub/Packages/Seurat.utils/weight3.bar.png") @@ -27,16 +27,17 @@ file.remove("~/GitHub/Packages/Seurat.utils/weight3.bar.png") #' #' @param obj A Seurat object to be processed. #' @param param.list A list of parameters used in the processing steps. -#' @param add.meta.fractions A boolean indicating whether to add meta data for fractions of cells in each cluster. Default: FALSE. -#' @param compute A boolean indicating whether to compute the results. Default: TRUE. -#' @param save A boolean indicating whether to save the results. Default: TRUE. -#' @param plot A boolean indicating whether to plot the results. Default: TRUE. +#' @param add.meta.fractions A boolean indicating whether to add meta data for fractions of cells in each cluster. Default: `FALSE`. +#' @param compute A boolean indicating whether to compute the results. Default: `TRUE`. +#' @param save A boolean indicating whether to save the results. Default: `TRUE`. +#' @param plot A boolean indicating whether to plot the results. Default: `TRUE`. #' @param nfeatures The number of variable genes to use. Default: 2000. #' @param variables.2.regress A list of variables to regress out. Default: NULL. #' @param n.PC The number of principal components to use. Default: 30. #' @param resolutions A list of resolutions to use for clustering. Default: c(0.1, 0.2, 0.3, 0.4, 0.5). #' @param reduction_input The reduction method to use as input for clustering & UMAP. Default: "pca". #' @param WorkingDir The working directory to save the results. Default: getwd(). +#' @param ... Additional parameters to be passed to `ScaleData()`. #' #' @return A Seurat object after applying scaling, PCA, UMAP, neighbor finding, and clustering. #' @@ -46,6 +47,7 @@ file.remove("~/GitHub/Packages/Seurat.utils/weight3.bar.png") #' #' @importFrom Seurat ScaleData RunPCA RunUMAP FindNeighbors FindClusters #' @importFrom tictoc tic toc +#' @importFrom harmony RunHarmony #' #' @export processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, @@ -74,7 +76,7 @@ processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, is.numeric(n.PC), is.numeric(resolutions), is.character(variables.2.regress) | is.null(variables.2.regress), "variables.2.regress is not found in @meta" = variables.2.regress %in% colnames(obj@meta.data) - ) + ) iprint("nfeatures:", nfeatures) iprint("n.PC:", n.PC) @@ -101,67 +103,80 @@ processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, if (compute) { message("------------------- FindVariableFeatures -------------------") tic("FindVariableFeatures") - obj <- FindVariableFeatures(obj, mean.function = "FastExpMean", - dispersion.function = "FastLogVMR", nfeatures = nfeatures); toc() + obj <- FindVariableFeatures(obj, + mean.function = "FastExpMean", + dispersion.function = "FastLogVMR", nfeatures = nfeatures + ) + toc() - tic('calc.q99.Expression.and.set.all.genes') - obj <- calc.q99.Expression.and.set.all.genes(obj = obj, quantileX = .99); toc() + tic("calc.q99.Expression.and.set.all.genes") + obj <- calc.q99.Expression.and.set.all.genes(obj = obj, quantileX = .99) + toc() message("------------------- ScaleData -------------------") - tic('ScaleData') - obj <- ScaleData(obj, assay = "RNA", verbose = TRUE, vars.to.regress = variables.2.regress); toc() + tic("ScaleData") + obj <- ScaleData(obj, assay = "RNA", verbose = TRUE, vars.to.regress = variables.2.regress, ...) + toc() if (reduction_input == "harmony") { message("------------------- Harmony - EXPERIMENTAL -------------------") - m.REGR <- obj@meta.data[, variables.2.regress, drop=F] + m.REGR <- obj@meta.data[, variables.2.regress, drop = FALSE] any_regr_col_numeric <- sapply(m.REGR, is.numeric) - if(any(any_regr_col_numeric)){ + if (any(any_regr_col_numeric)) { print("Some of the regression variables are numeric:") print(any_regr_col_numeric) stop("harmony cannot regress numeric variables") } - obj$'regress_out' <- apply(m.REGR,1, kppu) - if (nr.unique(obj$'regress_out') > 25) { + obj$"regress_out" <- apply(m.REGR, 1, kppu) + if (nr.unique(obj$"regress_out") > 25) { warning("The number of regress_out categories is too many (>25), consider serially calling harmony on each variable.", immediate. = TRUE) } - if (min(table(obj$'regress_out')) < 5) { + if (min(table(obj$"regress_out")) < 5) { warning("The number of cells in some regress_out categories is too few (<5), consider serially calling harmony on each variable.", immediate. = TRUE) } - nr_new_layers <- nr.unique(combined.obj$'regress_out') - nr_existing_layers <- (length(Layers(combined.obj))-1)/2 - if( nr_existing_layers != nr_new_layers) { + nr_new_layers <- nr.unique(combined.obj$"regress_out") + nr_existing_layers <- (length(Layers(combined.obj)) - 1) / 2 + if (nr_existing_layers != nr_new_layers) { tic("Split layers by regress_out") - combined.obj[["RNA"]] <- split(combined.obj[["RNA"]], f = combined.obj$'regress_out'); toc() + combined.obj[["RNA"]] <- split(combined.obj[["RNA"]], f = combined.obj$"regress_out") + toc() } tic("RunHarmony") - obj <- harmony::RunHarmony(object = obj, group.by.vars = "regress_out", dims.use = 1:nPCs, plot_convergence = F); toc() + obj <- harmony::RunHarmony(object = obj, group.by.vars = "regress_out", dims.use = 1:nPCs, plot_convergence = FALSE) + toc() tic("JoinLayers") - obj <- JoinLayers(obj, assay = "RNA"); toc() - obj@misc$'harmony.params' <- c( "nPCs" = nPCs, "regress" = variables.2.regress) - + obj <- JoinLayers(obj, assay = "RNA") + toc() + obj@misc$"harmony.params" <- c("nPCs" = nPCs, "regress" = variables.2.regress) } message("------------------- PCA /UMAP -------------------") tic("PCA") - obj <- RunPCA(obj, npcs = n.PC, verbose = TRUE); toc() + obj <- RunPCA(obj, npcs = n.PC, verbose = TRUE) + toc() tic("UMAP") - obj <- SetupReductionsNtoKdimensions(obj, nPCs = n.PC, reduction_output = "umap", - reduction_input = reduction_input, dimensions = 3:2); toc() + obj <- SetupReductionsNtoKdimensions(obj, + nPCs = n.PC, reduction_output = "umap", + reduction_input = reduction_input, dimensions = 3:2 + ) + toc() message("------------------- FindNeighbors & Clusters -------------------") tic("FindNeighbors") - obj <- FindNeighbors(obj, reduction = reduction_input, dims = 1:n.PC); toc() + obj <- FindNeighbors(obj, reduction = reduction_input, dims = 1:n.PC) + toc() tic("FindClusters") - obj <- FindClusters(obj, resolution = resolutions); toc() + obj <- FindClusters(obj, resolution = resolutions) + toc() } @@ -221,35 +236,35 @@ processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, #' @param ident Use this to specify a non-standard cluster identity, such as named clusters. #' `runDGEA` will use this ident explicitly for the DE analysis. Default: NULL. #' @param reorder.clusters Logical indicating whether to reorder clusters based on dimension. -#' Default: TRUE. +#' Default: `TRUE`. #' @param reorder.dimension Integer specifying the dimension for reordering (1 for x, -1 for y). #' Default: 1. #' @param add.combined.score Logical indicating whether to add a combined score to the markers. -#' Default: TRUE. +#' Default: `TRUE`. #' @param save.obj Logical indicating whether to save the modified Seurat object. -#' Default: TRUE. +#' Default: `TRUE`. #' @param directory Character string specifying the base directory for saving results. #' Default: OutDir #' @param dir_suffix Character string specifying the suffix for the subdirecotry directory. #' @param subdirectory Character string specifying the subdirectory for saving outputs within #' the base directory. Default: "DGEA + date". #' @param calculate.DGEA Logical determining if the DE analysis should be calculated. -#' Default: TRUE. +#' Default: `TRUE`. #' @param plot.DGEA Logical determining if results should be plotted. -#' Default: TRUE. +#' Default: `TRUE`. #' @param umap_caption Character string specifying the caption for the UMAP plot. Default: "". #' @param plot.av.enrichment.hist Logical indicating whether to plot the average enrichment histogram. -#' Default: TRUE. +#' Default: `TRUE`. #' @param plot.log.top.gene.stats Logical indicating whether to plot the log top gene statistics. #' @param auto.cluster.naming Logical indicating automatic labeling of clusters. -#' Default: TRUE. +#' Default: `TRUE`. #' @param clean.misc.slot Logical indicating whether to clean the misc slots of previous -#' clustering results. Default: TRUE. +#' clustering results. Default: `TRUE`. #' @param clean.meta.data Logical indicating whether to clean the metadata slots of -#' previous clustering results. Default: TRUE. +#' previous clustering results. Default: `TRUE`. #' @param n.cores Integer specifying the number of cores to use for parallel processing (multisession). #' Default: 1. -#' @param presto Logical indicating whether to use presto for DE analysis. Default: TRUE. +#' @param presto Logical indicating whether to use presto for DE analysis. Default: `TRUE`. #' @param WorkingDir Character string specifying the working directory. Default: getwd(). #' #' @importFrom future plan @@ -262,8 +277,8 @@ processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, runDGEA <- function(obj, param.list = p, ident = NULL, - res.analyzed.DE = if(is.null(ident)) c(.1) else ident, # param.list$'res.analyzed.DE' - reorder.clusters = if(is.null(ident)) TRUE else FALSE, + res.analyzed.DE = if (is.null(ident)) c(.1) else ident, # param.list$'res.analyzed.DE' + reorder.clusters = if (is.null(ident)) TRUE else FALSE, reorder.dimension = 1, # ordering = if(any(!testNumericCompatible(res.analyzed.DE))) "no" else "ordered", # param.list$"cl.annotation" # ordering = "ordered", # param.list$"cl.annotation" @@ -282,10 +297,8 @@ runDGEA <- function(obj, clean.meta.data = TRUE, n.cores = 1, presto = TRUE, - WorkingDir = getwd() - ) { - - if(presto) require(presto) + WorkingDir = getwd()) { + if (presto) require(presto) message("\nWorkingDir: ", WorkingDir) # Assertions for input parameters @@ -302,7 +315,7 @@ runDGEA <- function(obj, # Log utilized parameters from param.list { - message("cl.annotation: ", if(reorder.clusters) paste("ordered:", reorder.dimension) else "no") + message("cl.annotation: ", if (reorder.clusters) paste("ordered:", reorder.dimension) else "no") message("test: ", param.list$"test") message("only.pos: ", param.list$"only.pos") message("---------------------------------") @@ -315,10 +328,12 @@ runDGEA <- function(obj, } # Record changes in @misc$p - obj@misc$p$"res.analyzed.DE" <- if(is.null(ident)) res.analyzed.DE else ident - obj@misc$p$"cl.annotation" <- if(is.null(ident)) { - if(reorder.clusters) reorder.dimension else "no" - } else "character" + obj@misc$p$"res.analyzed.DE" <- if (is.null(ident)) res.analyzed.DE else ident + obj@misc$p$"cl.annotation" <- if (is.null(ident)) { + if (reorder.clusters) reorder.dimension else "no" + } else { + "character" + } # Retrieve analyzed DE resolutions message("Resolutions analyzed:") @@ -335,7 +350,7 @@ runDGEA <- function(obj, message("Erasing up the meta.data clustering columns.") topMslots <- grepv("top.markers.res", names(obj@meta.data)) cl.ordered <- GetOrderedClusteringRuns(obj = obj) - obj@meta.data[, cl.ordered ] <- NULL + obj@meta.data[, cl.ordered] <- NULL # cl.names <- GetNamedClusteringRuns(obj = obj, pat = "^cl.names.*[0-1]\\.[0-9]", # find.alternatives = FALSE) # obj@meta.data[, c(cl.ordered, cl.names)] <- NULL @@ -346,15 +361,16 @@ runDGEA <- function(obj, message("Renumbering ----------------------------------------") for (i in 1:length(res.analyzed.DE)) { res <- res.analyzed.DE[i] - create_set_OutDir(p0(dir_DGEA, ppp("res", res))) + create_set_OutDir(paste0(dir_DGEA, ppp("res", res))) message(i) # Reorder clusters based on average expression of markers message("Reordering clusters along dimension: ", sign(reorder.dimension), "*", if (abs(reorder.dimension) == 1) "x" else "y") - obj <- AutoNumber.by.UMAP(obj = obj, - ident = GetClusteringRuns(res = res, obj = obj)[1], - dim = abs(reorder.dimension), reduction = "umap", - swap = (reorder.dimension < 0), plot = TRUE + obj <- AutoNumber.by.UMAP( + obj = obj, + ident = GetClusteringRuns(res = res, obj = obj)[1], + dim = abs(reorder.dimension), reduction = "umap", + swap = (reorder.dimension < 0), plot = TRUE ) } # end for loop } # end if reorder.clusters @@ -377,17 +393,17 @@ runDGEA <- function(obj, # Loop through each resolution setting to find markers ________________________________________ - if (n.cores>1) future::plan("multisession", workers = n.cores) + if (n.cores > 1) future::plan("multisession", workers = n.cores) if (calculate.DGEA) { message("Calclulating ----------------------------------------") for (i in 1:length(res.analyzed.DE)) { res <- res.analyzed.DE[i] tag.res <- ppp("res", res) - df.slot <- if(!is.null(ident)) ident else tag.res + df.slot <- if (!is.null(ident)) ident else tag.res message("Resolution: ", res, " -----------") - create_set_OutDir(p0(dir_DGEA, tag.res)) + create_set_OutDir(paste0(dir_DGEA, tag.res)) message("Ident.for.DEG: ", Idents.for.DEG[[i]]) Idents(obj) <- Idents.for.DEG[[i]] @@ -395,17 +411,17 @@ runDGEA <- function(obj, # Perform differential expression analysis tic("FindAllMarkers") df.markers <- Seurat::FindAllMarkers(obj, - verbose = TRUE, - test.use = param.list$"test", - logfc.threshold = param.list$"logfc.threshold", - return.thresh = param.list$"return.thresh", - min.pct = param.list$"min.pct", - min.diff.pct = param.list$"min.diff.pct", - min.cells.group = param.list$"min.cells.group", - max.cells.per.ident = param.list$"max.cells.per.ident", - only.pos = param.list$"only.pos", - - ); toc() + verbose = TRUE, + test.use = param.list$"test", + logfc.threshold = param.list$"logfc.threshold", + return.thresh = param.list$"return.thresh", + min.pct = param.list$"min.pct", + min.diff.pct = param.list$"min.diff.pct", + min.cells.group = param.list$"min.cells.group", + max.cells.per.ident = param.list$"max.cells.per.ident", + only.pos = param.list$"only.pos", + ) + toc() Stringendo::stopif(is.null(df.markers)) @@ -419,10 +435,9 @@ runDGEA <- function(obj, # Save results to disk fname <- ppp("df.markers", res) - ReadWriter::write.simple.tsv(df.markers, filename = fname, v = F) + ReadWriter::write.simple.tsv(df.markers, filename = fname, v = FALSE) df.markers.all[[i]] <- df.markers - xsave(df.markers, suffix = df.slot, v = F) - + xsave(df.markers, suffix = df.slot, v = FALSE) } # end for loop # Save final results to disk @@ -435,22 +450,22 @@ runDGEA <- function(obj, if (save.obj) { create_set_OutDir(WorkingDir) - tag <- if(is.null(ident)) kpp("res", res.analyzed.DE) else ident + tag <- if (is.null(ident)) kpp("res", res.analyzed.DE) else ident xsave(obj, suffix = kpp("w.DGEA", tag)) } } # end if calculate.DGEA # Loop through each resolution setting to find markers ________________________________________ if (plot.DGEA) { - message('Plotting results -----------------') + message("Plotting results -----------------") for (i in 1:length(res.analyzed.DE)) { res <- res.analyzed.DE[i] - message('Resolution: ', res) + message("Resolution: ", res) tag.res <- ppp("res", res) df.slot <- if (!is.null(ident)) ident else tag.res - create_set_OutDir(p0(dir_DGEA, df.slot)) + create_set_OutDir(paste0(dir_DGEA, df.slot)) df.markers <- obj@misc$"df.markers"[[df.slot]] Stringendo::stopif(is.null(df.markers)) @@ -465,7 +480,7 @@ runDGEA <- function(obj, # Automatic cluster labeling by top gene ________________________________________ if (auto.cluster.naming) { - message('Automatic cluster labeling by top gene.') + message("Automatic cluster labeling by top gene.") obj <- StoreAllMarkers(df_markers = df.markers, res = res, obj = obj) obj <- AutoLabelTop.logFC(group.by = Idents.for.DEG[[i]], obj = obj, plot.top.genes = FALSE) # already plotted above @@ -475,28 +490,30 @@ runDGEA <- function(obj, # Plot per-cluster gene enrichment histogram ________________________________________ if (plot.av.enrichment.hist) { - message('Plotting per-cluster gene enrichment histogram.') + message("Plotting per-cluster gene enrichment histogram.") # create_set_OutDir(directory, subdirectory) df.markers.tbl <- as_tibble(df.markers) - df.markers.tbl$'cluster' <- as.character(df.markers.tbl$'cluster') - p.deg.hist <- ggpubr::gghistogram(df.markers.tbl, x = "avg_log2FC", - title = "Number of enriched genes per cluster", - subtitle = "Binned by Log2(FC)", - caption = paste(res, "| vertical line at FC of 2."), - rug = TRUE, - color = "cluster", fill = "cluster", - facet.by = 'cluster', xlim = c(0,3), - ylab = "Nr. D.E. Genes") + + df.markers.tbl$"cluster" <- as.character(df.markers.tbl$"cluster") + p.deg.hist <- ggpubr::gghistogram(df.markers.tbl, + x = "avg_log2FC", + title = "Number of enriched genes per cluster", + subtitle = "Binned by Log2(FC)", + caption = paste(res, "| vertical line at FC of 2."), + rug = TRUE, + color = "cluster", fill = "cluster", + facet.by = "cluster", xlim = c(0, 3), + ylab = "Nr. D.E. Genes" + ) + geom_vline(xintercept = 1) + theme_linedraw() - qqSave(ggobj = p.deg.hist, w = 10, h = 6, title = ppp("Enrichment log2FC per cluster",res)) + qqSave(ggobj = p.deg.hist, w = 10, h = 6, title = ppp("Enrichment log2FC per cluster", res)) } # Plot per-cluster enriched gene counts ________________________________________ if (plot.log.top.gene.stats) { - message('Plotting per-cluster enriched gene counts.') + message("Plotting per-cluster enriched gene counts.") # Filter genes with avg_log2FC > 2 lfc2_hiSig_genes <- df.markers |> @@ -510,14 +527,15 @@ runDGEA <- function(obj, # Get the number of genes per cluster (NrOfHighlySignLFC2_genes <- lfc2_hiSig_genes |> - summarise(n = n()) |> - deframe() |> - sortbyitsnames()) - - qbarplot(NrOfHighlySignLFC2_genes, label = NrOfHighlySignLFC2_genes, - plotname = "Number of diff. genes per cluster", - sub = 'Genes with avg_log2FC > 1 and p_val_adj < 0.05', - xlab = "Clusters", ylab = "Number of diff. genes" + summarise(n = n()) |> + deframe() |> + sortbyitsnames()) + + qbarplot(NrOfHighlySignLFC2_genes, + label = NrOfHighlySignLFC2_genes, + plotname = "Number of diff. genes per cluster", + sub = "Genes with avg_log2FC > 1 and p_val_adj < 0.05", + xlab = "Clusters", ylab = "Number of diff. genes" ) # Write out gene lists per cluster ________________________________________ @@ -531,12 +549,12 @@ runDGEA <- function(obj, names(genes_list) <- unique(lfc2_hiSig_genes$"cluster") genes_list <- sortbyitsnames(genes_list) - names(genes_list) <- ppp("cl", names(genes_list), top_genes,"DGs") + names(genes_list) <- ppp("cl", names(genes_list), top_genes, "DGs") # write out the gene list, each element to a txt file. - create_set_OutDir(p0(dir_DGEA, ppp("res", res), "/top_genes")) - for (i in 1:l(genes_list)) { - write.simple.vec(input_vec = genes_list[[i]], filename = names(genes_list)[i], v = F ) + create_set_OutDir(paste0(dir_DGEA, ppp("res", res), "/top_genes")) + for (i in 1:length(genes_list)) { + write.simple.vec(input_vec = genes_list[[i]], filename = names(genes_list)[i], v = FALSE) } # for cluster } } # end if plot.log.top.gene.stats @@ -662,7 +680,7 @@ parallel.computing.by.future <- function(cores = 4, maxMemSize = 4000 * 1024^2) #' @param genes A vector of gene names to be intersected with the Seurat object. #' @param obj A Seurat object containing gene expression data. #' @param n_genes_shown Number of missing genes to be printed. Default: 10. -#' @param strict All genes to be present in the Seurat object? Default: TRUE. +#' @param strict All genes to be present in the Seurat object? Default: `TRUE`. #' @param verbose verbose #' @return A vector of gene names that are found both in the input 'genes' vector and the #' Seurat object. @@ -736,7 +754,7 @@ IntersectGeneLsWithObject <- function(genes, obj = combined.obj, n_genes_shown = #' @param above The expression level threshold above which genes are considered noticeably #' expressed. Default: 0. #' @param sort A logical flag indicating whether to sort the filtered genes by their expression -#' levels in decreasing order. Default: FALSE. +#' levels in decreasing order. Default: `FALSE`. #' @return A vector of gene names that are found both in the input 'genes' vector and the Seurat #' object, and have expression levels above the specified 'above' threshold. If `sort` is TRUE, #' these genes are returned in decreasing order of their expression levels. @@ -888,11 +906,13 @@ addToMiscOrToolsSlot <- function(obj, pocket_name = "misc", # _________________________________________________________________________________________________ #' @title Display Slots in the @tools of an Seurat Object #' -#' @description -#' `showToolsSlots` prints the names of slots in the `@tools` of a given object. +#' @description `showToolsSlots` prints the names of slots in the `@tools` of a given object. #' It specifically targets list elements, skipping over data frames and other non-list objects. #' #' @param obj An object whose `@tools` slot needs to be examined. +#' @param max.level The maximum level of nesting to print. +#' @param subslot The name of a sub-slot within the `@tools` slot to examine. +#' @param ... Additional arguments to be passed to `str`. #' #' @details #' The function iterates over the slots in the `@tools` of `obj`. If a slot is a list @@ -930,7 +950,7 @@ showToolsSlots <- function(obj, max.level = 1, subslot = NULL, ...) { #' @description See `showToolsSlots` for details. Prints the names of slots in the `@misc` of a given object. #' It specifically targets list elements, skipping over data frames and other non-list objects. #' -#' @param obj An object whose `@misc` slot needs to be examined. Default: combined.obj +#' @param obj An object whose `@misc` slot needs to be examined. Default: `combined.obj` #' @param max.level Max depth to dive into sub-elements. #' @param subslot A subslot within `@misc`. #' @param ... ... @@ -956,15 +976,15 @@ showMiscSlots <- function(obj = combined.obj, max.level = 1, subslot = NULL, #' @title calc.q99.Expression.and.set.all.genes #' @description Calculate the gene expression of the e.g.: 99th quantile (expression in the top 1% cells). -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @param quantileX Quantile level, Default: 0.9 #' @param max.cells Max number of cells to do the calculation on. Downsample if excdeeded. Default: 1e+05 #' @param slot slot in the Seurat object. Default: 'data' #' @param assay RNA or integrated assay, Default: c("RNA", "integrated")[1] -#' @param set.misc Create the "all.genes" variable in @misc? Default: TRUE -#' @param assign_to_global_env Create the "all.genes" variable in the global env?, Default: TRUE -#' @param plot Plot the expression distribution? Default: TRUE -#' @param show Show the distribution plot? Default: TRUE +#' @param set.misc Create the "all.genes" variable in @misc? Default: `TRUE`. +#' @param assign_to_global_env Create the "all.genes" variable in the global env?, Default: `TRUE`. +#' @param plot Plot the expression distribution? Default: `TRUE`. +#' @param show Show the distribution plot? Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -995,8 +1015,7 @@ calc.q99.Expression.and.set.all.genes <- function( assign_to_global_env = TRUE, suffix = as.character(substitute(obj)), plot = TRUE, - show = TRUE - ) { + show = TRUE) { message("slot: ", slot, " assay: ", assay, ".\n") tictoc::tic("calc.q99.Expression.and.set.all.genes") @@ -1016,7 +1035,7 @@ calc.q99.Expression.and.set.all.genes <- function( # Get the data matrix ____________________________________________________________ assay_data <- obj@assays[[assay]] if (obj@version >= "5") { - if(assay == "RNA") { + if (assay == "RNA") { layers <- assay_data@layers message(length(layers), " layers in RNA assay") stopifnot(slot %in% names(layers)) @@ -1038,8 +1057,10 @@ calc.q99.Expression.and.set.all.genes <- function( # Calculate the number of cells in the top quantile (e.g.: 99th quantile) that is # required to for gene expression to be >0 n.cells <- floor(ncol(data_mtx) * (1 - quantileX)) - message("Each gene has to be expressed in min. ", n.cells, " cells, to have >0 quantile-expression\n", - "quantileX: ", quantileX, " max.cells: ", max.cells) + message( + "Each gene has to be expressed in min. ", n.cells, " cells, to have >0 quantile-expression\n", + "quantileX: ", quantileX, " max.cells: ", max.cells + ) # Prepare for plotting ____________________________________________________________ qname <- paste0("q", quantileX * 100) @@ -1053,22 +1074,22 @@ calc.q99.Expression.and.set.all.genes <- function( qnameP <- paste0(100 * quantileX, "th quantile") # Plot the distribution of gene expression in the 99th quantile _________________________________ - if(plot){ + if (plot) { pobj <- ggExpress::qhistogram(log2.gene.expr.of.the.90th.quantile, - plotname = paste("Gene expression in the", qnameP, " in", suffix), - ext = "pdf", breaks = 30, - subtitle = kollapse(pc_TRUE(expr.q99 > 0, NumberAndPC = TRUE), " genes have ", qname, " expr. > 0 (in ", n.cells, " cells)."), - caption = paste(n.cells, "cells in", qnameP, "from", ncol(data_mtx), "cells in (downsampled) object."), - suffix = suffix, - xlab = paste0("log2(expr. in the ", qnameP, "quantile+1) [UMI]"), - ylab = "Nr. of genes", - plot = T, save = TRUE, - vline = .15, - filtercol = TRUE, - palette_use = "npg" + plotname = paste("Gene expression in the", qnameP, " in", suffix), + ext = "pdf", breaks = 30, + subtitle = kollapse(pc_TRUE(expr.q99 > 0, NumberAndPC = TRUE), " genes have ", qname, " expr. > 0 (in ", n.cells, " cells)."), + caption = paste(n.cells, "cells in", qnameP, "from", ncol(data_mtx), "cells in (downsampled) object."), + suffix = suffix, + xlab = paste0("log2(expr. in the ", qnameP, "quantile+1) [UMI]"), + ylab = "Nr. of genes", + plot = TRUE, save = TRUE, + vline = .15, + filtercol = TRUE, + palette_use = "npg" ) tictoc::toc() - if(show) print(pobj) + if (show) print(pobj) } @@ -1101,7 +1122,7 @@ calc.q99.Expression.and.set.all.genes <- function( #' @param pattern_NC A character vector of patterns to filter out non-coding gene symbols. #' Default: c("^AC.", "^AL.", "^c[1-9]orf", "\\.AS[1-9]$"). #' @param v "verbose" Whether to print the number of genes before and after filtering. -#' @param unique Whether to return unique gene symbols. Default: TRUE. +#' @param unique Whether to return unique gene symbols. Default: `TRUE`. #' @param ... Additional arguments to pass to \code{\link[stringr]{str_detect}}. #' #' @return A character vector of filtered gene symbols. @@ -1114,15 +1135,17 @@ calc.q99.Expression.and.set.all.genes <- function( #' @importFrom stringr str_detect #' @export #' -filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", - "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", - "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", - "^MIR[1-9]", "^SNHG[1-9]"), +filterNcGenes <- function(genes, pattern_NC = c( + "^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]" + ), v = TRUE, unique = TRUE, ...) { - # Input assertions - stopifnot(is.character(genes), length(genes) > 0, - is.character(pattern_NC), length(pattern_NC) > 0 + stopifnot( + is.character(genes), length(genes) > 0, + is.character(pattern_NC), length(pattern_NC) > 0 ) # Filter the genes @@ -1133,7 +1156,7 @@ filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}" genes_kept <- genes[stringr::str_detect(genes, combined_pattern, negate = TRUE)] # Report original and final list sizes and percentage remaining - if(v) { + if (v) { original_length <- length(genes) filtered_length <- length(genes_kept) percentage_remaining <- (filtered_length / original_length) * 100 @@ -1146,7 +1169,7 @@ filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}" # Output assertions stopifnot(is.character(genes_kept), length(genes_kept) <= original_length) - if(unique) genes_kept <- unique(genes_kept) + if (unique) genes_kept <- unique(genes_kept) return(genes_kept) } @@ -1184,7 +1207,6 @@ RenameClustering <- function( suffix.plot = "", plot_umaps = TRUE, ...) { - NewX <- CodeAndRoll2::translate( vec = as.character(obj@meta.data[, orig.ident]), old = names(namedVector), @@ -1201,7 +1223,7 @@ RenameClustering <- function( suffix.plot <- if (nchar(suffix.plot)) make.names(suffix.plot) print(clUMAP(orig.ident, suffix = suffix.plot, sub = suffix.plot, obj = obj, ...)) print(clUMAP(new.ident, suffix = suffix.plot, sub = suffix.plot, obj = obj, ...)) - clUMAP(new.ident, suffix = suffix.plot, sub = suffix.plot, label = F, obj = obj, ...) + clUMAP(new.ident, suffix = suffix.plot, sub = suffix.plot, label = FALSE, obj = obj, ...) } else { iprint("New ident:", new.ident) } @@ -1274,10 +1296,10 @@ getClusterNames <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[2 #' #' @description The `GetClusteringRuns` function retrieves metadata column names associated with #' clustering runs, based on a pattern to match, `"*snn_res.[0-9].[0-9]$"`, by default. -#' @param obj Seurat object, Default: combined.obj -#' @param res Clustering resoluton to use, Default: FALSE +#' @param obj Seurat object, Default: `combined.obj` +#' @param res Clustering resoluton to use, Default: `FALSE`. #' @param pat Pattern to match, Default: `*snn_res.*[0-9]$` -#' @param v +#' @param v verbose, Default: `TRUE`. #' #' @return Prints and returns the sorted unique cluster names as a character vector. #' @examples @@ -1290,12 +1312,11 @@ getClusterNames <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[2 GetClusteringRuns <- function(obj = combined.obj, res = FALSE, pat = "*snn_res.[0-9].[0-9]+$", v = TRUE) { - # Get clustering results clustering.results <- sort(CodeAndRoll2::grepv(x = colnames(obj@meta.data), pattern = pat)) # Check if no clustering results were found - if (identical(clustering.results, character(0))) if(v) warning("No matching (simple) clustering column found!", immediate. = TRUE) + if (identical(clustering.results, character(0))) if (v) warning("No matching (simple) clustering column found!", immediate. = TRUE) if (!isFALSE(res)) { # Extract numeric values from clustering.results @@ -1303,11 +1324,10 @@ GetClusteringRuns <- function(obj = combined.obj, # Filter clustering.results based on the numeric vector res clustering.results <- clustering.results[clustering.res.found.numeric %in% res] - if(length(clustering.results) == 0) warning("No clustering matches `res`!", immediate. = TRUE) - + if (length(clustering.results) == 0) warning("No clustering matches `res`!", immediate. = TRUE) } - if(v) { + if (v) { message("Clustering runs found:") dput(clustering.results) } @@ -1320,13 +1340,13 @@ GetClusteringRuns <- function(obj = combined.obj, #' #' @description The `GetNamedClusteringRuns` function retrieves metadata column names associated with #' non-numeric ("named") clustering runs, based on a pattern to match, `"Name|name"`, by default. -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @param res Clustering resoluton to use, Default: c(FALSE, 0.5)[1] -#' @param topgene Match clustering named after top expressed gene (see vertesy/Seurat.pipeline/~Diff gene expr.), Default: FALSE +#' @param topgene Match clustering named after top expressed gene (see vertesy/Seurat.pipeline/~Diff gene expr.), Default: `FALSE`. #' @param pat Pattern to match, Default: '^cl.names.Known.*[0,1]\.[0-9]$' #' @param find.alternatives If TRUE, tries to find alternative clustering runs with -#' the same resolution, Default: TRUE -#' @param v Verbose output, Default: TRUE +#' the same resolution, Default: `TRUE`. +#' @param v Verbose output, Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -1341,18 +1361,19 @@ GetNamedClusteringRuns <- function( pat = c("^cl.names.top.gene.+[0-9]\\.[0-9]", "Name|name")[2], find.alternatives = TRUE, v = TRUE) { - if (res) pat <- gsub(x = pat, pattern = "\\[.*\\]", replacement = res) if (topgene) pat <- gsub(x = pat, pattern = "Known", replacement = "top") clustering.results <- CodeAndRoll2::grepv(x = colnames(obj@meta.data), pattern = pat) if (identical(clustering.results, character(0))) { - if(v) warning("No matching (named) clustering column found! Trying GetClusteringRuns(..., pat = '*_res.*[0,1]\\.[0-9]$)", immediate. = TRUE ) - if (find.alternatives) clustering.results <- - GetClusteringRuns(obj = obj, res = FALSE, pat = "*_res.*[0,1]\\.[0-9]$", v = F) + if (v) warning("No matching (named) clustering column found! Trying GetClusteringRuns(..., pat = '*_res.*[0,1]\\.[0-9]$)", immediate. = TRUE) + if (find.alternatives) { + clustering.results <- + GetClusteringRuns(obj = obj, res = FALSE, pat = "*_res.*[0,1]\\.[0-9]$", v = FALSE) + } } - if(v) dput(clustering.results) + if (v) dput(clustering.results) return(clustering.results) } @@ -1363,8 +1384,8 @@ GetNamedClusteringRuns <- function( #' @title GetOrderedClusteringRuns #' #' @description Get Clustering Runs: metadata column names. -#' @param obj Seurat object, Default: combined.obj. -#' @param res Clustering resoluton to use, Default: FALSE +#' @param obj Seurat object, Default: `combined.obj`. +#' @param res Clustering resoluton to use, Default: `FALSE`. #' @param pat Pattern to match, Default: '*snn_res.*[0,1]\.[0-9]\.ordered$' #' @examples #' \dontrun{ @@ -1388,7 +1409,7 @@ GetOrderedClusteringRuns <- function(obj = combined.obj, res = FALSE, #' @title GetNumberOfClusters #' #' @description Get Number Of Clusters # -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @examples #' \dontrun{ #' if (interactive()) { @@ -1411,20 +1432,20 @@ GetNumberOfClusters <- function(obj = combined.obj) { # Get Number Of Clusters #' #' @description Calculates the average of a metadata column (numeric) per cluster. #' @param col_name The name of the column for which the average is calculated. Default: 'Score.GO.0006096'. -#' @param plot.UMAP.too Whether to plot a UMAP as well. Default: TRUE. -#' @param return.plot Whether to return the plot. Default: FALSE. -#' @param obj The main Seurat object used for calculations. Default: combined.obj. +#' @param plot.UMAP.too Whether to plot a UMAP as well. Default: `TRUE`. +#' @param return.plot Whether to return the plot. Default: `FALSE`. +#' @param obj The main Seurat object used for calculations. Default: `combined.obj`. #' @param split_by Cluster to split by. Default: First entry of GetNamedClusteringRuns(). -#' @param scale.zscore Whether to scale z-scores. Default: FALSE. -#' @param simplify Whether to simplify the result. Default: TRUE. -#' @param plotit Whether to plot the results. Default: TRUE. -#' @param histogram Whether to produce a histogram. Default: FALSE. +#' @param scale.zscore Whether to scale z-scores. Default: `FALSE`. +#' @param simplify Whether to simplify the result. Default: `TRUE`. +#' @param plotit Whether to plot the results. Default: `TRUE`. +#' @param histogram Whether to produce a histogram. Default: `FALSE`. #' @param nbins The number of bins for the histogram. Default: 50. #' @param suffix Suffix added to the filename. Default: NULL. #' @param stat Statistical method applied, "mean" or "median". Default: "median". #' @param quantile.thr The threshold for quantiles. Default: 0.9. -#' @param absolute.thr Absolute threshold used in computations. Default: FALSE. -#' @param filter The filter mode: 'above', 'below', or FALSE. Default: FALSE. +#' @param absolute.thr Absolute threshold used in computations. Default: `FALSE`. +#' @param filter The filter mode: 'above', 'below', or FALSE. Default: `FALSE`. #' @param ylab.text Text for the y-axis label. Default: "Cluster" followed by the statistical method and "score". #' @param title Title for the plot. Default: "Cluster" followed by the statistical method and column name. #' @param subtitle The subtitle for the plot. Default: NULL. @@ -1432,7 +1453,7 @@ GetNumberOfClusters <- function(obj = combined.obj) { # Get Number Of Clusters #' @param height The height of the plot. Default: 6. #' @param ... Additional parameters passed to the internally called functions. #' @param xlb The label for the x-axis. Default depends on the 'absolute.thr' parameter. -#' @param fname The filename for the plot. Default is based on column name and split_by value. +#' @param fname The filename for the plot. Default: based on column name and split_by value. #' @export #' @importFrom Stringendo percentage_formatter @@ -1559,9 +1580,9 @@ calc.cluster.averages <- function( #' #' @description Plot gene expression based on the expression at the 90th quantile #' (so you will not lose genes expressed in few cells). -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @param gene gene of interest, Default: 'ACTB' -#' @param filterZero Remove genes whose quantile-90 expression in 0? Default: TRUE +#' @param filterZero Remove genes whose quantile-90 expression in 0? Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -1618,7 +1639,7 @@ plot.expression.rank.q90 <- function(obj = combined.obj, gene = "ACTB", filterZe #' @title set.mm #' #' @description Helps to find metadata columns. It creates a list with the names of of 'obj@meta.data'. -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @examples #' \dontrun{ #' if (interactive()) { @@ -1662,7 +1683,7 @@ ww.get.1st.Seur.element <- function(obj) { #' @title Recall all.genes global variable from a Seurat object #' #' @description all.genes set by calc.q99.Expression.and.set.all.genes() # -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @examples #' \dontrun{ #' if (interactive()) { @@ -1701,7 +1722,7 @@ recall.all.genes <- function(obj = combined.obj, overwrite = FALSE) { #' @title recall.meta.tags.n.datasets #' #' @description Recall meta.tags from obj@misc to "meta.tags" in the global environment. -#' @param obj Seurat object, Default: combined.obj +#' @param obj Seurat object, Default: `combined.obj` #' @examples #' \dontrun{ #' if (interactive()) { @@ -1746,8 +1767,8 @@ recall.meta.tags.n.datasets <- function(obj = combined.obj) { #' @title recall.parameters #' #' @description Recall parameters from obj@misc to "p" in the global environment. -#' @param obj Seurat object, Default: combined.obj -#' @param overwrite Overwrite already existing in environment? Default: FALSE +#' @param obj Seurat object, Default: `combined.obj` +#' @param overwrite Overwrite already existing in environment? Default: `FALSE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -1766,7 +1787,7 @@ recall.parameters <- function(obj = combined.obj, overwrite = FALSE) { if (p_found) message(" -> Variable 'p' exits in the global namespace.") if (!p_found | (p_found & overwrite == TRUE)) { - MarkdownHelpers::ww.assign_to_global(name = "p", value = obj@misc$"p", verbose = F) + MarkdownHelpers::ww.assign_to_global(name = "p", value = obj@misc$"p", verbose = FALSE) message("p is now (re)defined in the global environment.") } else { message("p not overwritten.") @@ -1782,8 +1803,8 @@ recall.parameters <- function(obj = combined.obj, overwrite = FALSE) { #' @title recall.genes.ls #' #' @description Recall genes.ls from obj@misc to "genes.ls" in the global environment. -#' @param obj Seurat object, Default: combined.obj -#' @param overwrite Overwrite already existing in environment? Default: FALSE +#' @param obj Seurat object, Default: `combined.obj` +#' @param overwrite Overwrite already existing in environment? Default: `FALSE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -1821,7 +1842,7 @@ recall.genes.ls <- function(obj = combined.obj, overwrite = FALSE) { # genes.ls #' #' @param obj Seurat object to update; Default: `combined.obj`. #' @param params List of parameters to save; Default: `p`. -#' @param overwrite Logical indicating if existing parameters should be overwritten; Default: TRUE. +#' @param overwrite Logical indicating if existing parameters should be overwritten; Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -2014,7 +2035,6 @@ subsetSeuObjByIdent <- function( ident = GetClusteringRuns()[1], identGroupKeep, invert = FALSE) { - tic("subsetSeuObjByIdent") # Input checks stopifnot( @@ -2055,7 +2075,7 @@ subsetSeuObjByIdent <- function( #' @param obj A Seurat object to subset. Default: the i-th element of the list 'ls.Seurat'. #' @param fractionCells The fraction of the object's data to keep. Default: 0.25. #' @param nCells If set to a number greater than 1, indicates the absolute number of cells to keep. -#' If FALSE, the function uses 'fractionCells' to determine the number of cells. Default: FALSE. +#' If FALSE, the function uses 'fractionCells' to determine the number of cells. Default: `FALSE`. #' @param seed A seed for random number generation to ensure reproducible results. Default: 1989. #' @export #' @importFrom Stringendo percentage_formatter @@ -2241,29 +2261,28 @@ downsampleSeuObjByIdentAndMaxcells <- function(obj, #' v = TRUE #' ) #' - -RelabelSmallCategories <- function(obj, col_in, backup_col_name = ppp(col_in, "orig"), min_count = 100, small_label = "Other", v = T) { +RelabelSmallCategories <- function(obj, col_in, backup_col_name = ppp(col_in, "orig"), min_count = 100, small_label = "Other", v = TRUE) { # Input assertions stopifnot( - inherits(obj, "Seurat"), # Check if obj is a Seurat object - is.character(col_in), length(col_in) == 1, # col_in is a single string - col_in %in% colnames(obj@meta.data), # col_in exists in metadata - is.character(backup_col_name), length(backup_col_name) == 1, # backup_col_name is a single string - is.numeric(min_count), min_count > 0, # min_count is a positive number - is.character(small_label), length(small_label) == 1, # small_label is a single string - is.logical(v), length(v) == 1 # v is a single logical value + inherits(obj, "Seurat"), # Check if obj is a Seurat object + is.character(col_in), length(col_in) == 1, # col_in is a single string + col_in %in% colnames(obj@meta.data), # col_in exists in metadata + is.character(backup_col_name), length(backup_col_name) == 1, # backup_col_name is a single string + is.numeric(min_count), min_count > 0, # min_count is a positive number + is.character(small_label), length(small_label) == 1, # small_label is a single string + is.logical(v), length(v) == 1 # v is a single logical value ) - message('backup_col_name: ', backup_col_name) + message("backup_col_name: ", backup_col_name) categories <- obj@meta.data[[backup_col_name]] <- obj@meta.data[[col_in]] # Extract the specified metadata column - category_counts <- table(categories) # Count occurrences of each category + category_counts <- table(categories) # Count occurrences of each category small_categories <- names(category_counts[category_counts < min_count]) # Identify small categories - new_categories <- as.character(categories) # Copy original categories - new_categories[new_categories %in% small_categories] <- small_label # Relabel small categories - obj@meta.data[[col_in]] <- new_categories # Add new column to metadata + new_categories <- as.character(categories) # Copy original categories + new_categories[new_categories %in% small_categories] <- small_label # Relabel small categories + obj@meta.data[[col_in]] <- new_categories # Add new column to metadata - if (v) { # Verbose output + if (v) { # Verbose output total_cells <- length(categories) num_small_categories <- length(small_categories) num_large_categories <- length(category_counts) - num_small_categories @@ -2274,7 +2293,7 @@ RelabelSmallCategories <- function(obj, col_in, backup_col_name = ppp(col_in, "o message(sprintf("Cells in relabeled categories: %d (%.2f%% of total)", sum(category_counts[small_categories]), percent_small_cells)) } - return(obj) # Return the modified Seurat object + return(obj) # Return the modified Seurat object } @@ -2365,15 +2384,15 @@ removeResidualSmallClusters <- function( #' @export dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character = FALSE, only = NULL, exclude = NULL) { - stopifnot(is(obj, "Seurat")) META <- obj@meta.data names.meta <- colnames(obj@meta.data) - stopifnot(is.logical(verbose), - is.logical(also.character), - is.null(only) | only %in% names.meta, - is.null(exclude) | exclude %in% names.meta + stopifnot( + is.logical(verbose), + is.logical(also.character), + is.null(only) | only %in% names.meta, + is.null(exclude) | exclude %in% names.meta ) colclasses <- sapply(META, class) @@ -2382,8 +2401,12 @@ dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character if (!is.null(only)) drop_in_these <- only if (!is.null(exclude)) drop_in_these <- setdiff(drop_in_these, exclude) - if (verbose) message("Dropping levels in ", length(drop_in_these), " identities:\n", - kppc(drop_in_these)) + if (verbose) { + message( + "Dropping levels in ", length(drop_in_these), " identities:\n", + kppc(drop_in_these) + ) + } for (i in 1:length(drop_in_these)) { colX <- drop_in_these[i] @@ -2402,7 +2425,7 @@ dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character #' drops levels in factor-like metadata. #' @param ls_obj A list of Seurat objects. #' @param object_names A character vector containing the names of the Seurat objects to process. -#' Default is names of all objects in the `ls_obj`. +#' Default: names of all objects in the `ls_obj`. #' @param indices A numeric vector indicating which datasets to process by their position in #' the `object_names` vector. By default, it processes the second and third datasets. #' @param ... Additional parameters passed to the `removeResidualSmallClusters` function. @@ -2442,12 +2465,12 @@ removeClustersAndDropLevels <- function(ls_obj, #' @description This function applies a cutoff in the specified dimension of a given #' dimension reduction (UMAP, PCA, or t-SNE) to remove cells. #' @param reduction A string specifying the dimension reduction technique to be used -#' ('umap', 'pca', or 'tsne'). Default is 'umap'. -#' @param umap_dim An integer specifying which dimension (axis) to apply the cutoff. Default is 1. -#' @param obj A Seurat object. Default is 'combined.obj'. -#' @param cutoff A numerical value indicating the cutoff value for the specified dimension. Default is 0. +#' ('umap', 'pca', or 'tsne'). Default: 'umap'. +#' @param umap_dim An integer specifying which dimension (axis) to apply the cutoff. Default: 1. +#' @param obj A Seurat object. Default: 'combined.obj'. +#' @param cutoff A numerical value indicating the cutoff value for the specified dimension. Default: 0. #' @param cut_below A logical value indicating whether to remove cells below (TRUE) or -#' above (FALSE) the cutoff line. Default is TRUE. +#' above (FALSE) the cutoff line. Default: `TRUE`. #' @param only_plot_cutoff Simulate and plot cutoff only. #' @param ... Any other parameters to be passed to internally called functions. #' @return A Seurat object with cells removed according to the specified cutoff. @@ -2514,7 +2537,7 @@ removeCellsByUmap <- function( #' @param ls.obj List of Seurat objects to be downsampled; Default: `ls.Seurat`. #' @param NrCells Target number of cells to downsample each Seurat object to. #' @param save_object Logical indicating whether to save the downsampled Seurat objects using `isaveRDS` -#' or to return them; Default: FALSE. +#' or to return them; Default: `FALSE`. #' #' @examples #' \dontrun{ @@ -2580,7 +2603,7 @@ downsampleListSeuObjsNCells <- function( #' @param ls.obj List of Seurat objects to be downsampled; Default: `ls.Seurat`. #' @param fraction Fraction of cells to retain in each Seurat object; Default: 0.1. #' @param save_object Logical indicating whether to save the downsampled Seurat objects using -#' `isaveRDS` or return them; Default: FALSE. +#' `isaveRDS` or return them; Default: `FALSE`. #' #' @examples #' \dontrun{ @@ -2777,11 +2800,12 @@ StoreAllMarkers <- function( GetTopMarkersDF <- function( dfDE = df.markers, n = p$"n.markers", order.by = c("avg_log2FC", "p_val_adj")[1], - exclude = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", - "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", - "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", - "^MIR[1-9]", "^SNHG[1-9]") - ) { + exclude = c( + "^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]" + )) { "Works on active Idents() -> thus we call cluster" combined_pattern <- paste(exclude, collapse = "|") @@ -2846,7 +2870,7 @@ GetTopMarkers <- function(dfDE = df.markers, #' @param obj A Seurat object, with default value `combined.obj`. #' @param group.by The clustering group to be used, defaults to the first entry by #' `GetClusteringRuns()`. -#' @param res Clustering resolution tag. Default is extracted from `group.by`. +#' @param res Clustering resolution tag. Default: extracted from `group.by`. #' @param plot.top.genes Logical indicating whether to show a plot, default is `TRUE`. #' @param suffix Suffix for the naming, defaults to the value of `res`. #' @param order.by Sorting criterion for the output tibble, defaults to the second element @@ -2873,13 +2897,14 @@ AutoLabelTop.logFC <- function( plot.top.genes = TRUE, suffix = res, order.by = c("combined.score", "avg_log2FC", "p_val_adj")[2], - exclude = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", - "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", - "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", - "^MIR[1-9]", "^SNHG[1-9]"), + exclude = c( + "^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]" + ), df_markers = obj@misc$"df.markers"[[paste0("res.", res)]], plotEnrichment = TRUE) { - message(group.by) message(" > Running AutoLabelTop.logFC...") @@ -2894,12 +2919,13 @@ AutoLabelTop.logFC <- function( if (plotEnrichment) { top_log2FC <- df.top.markers$"avg_log2FC" names(top_log2FC) <- ppp(df.top.markers$"cluster", df.top.markers$"gene") - ggExpress::qbarplot(top_log2FC, plotname = "The strongest fold change by cluster", - label = iround(top_log2FC), - subtitle = group.by, - ylab = "avg_log2FC", xlab = "clusters", - hline = 2, - suffix = group.by + ggExpress::qbarplot(top_log2FC, + plotname = "The strongest fold change by cluster", + label = iround(top_log2FC), + subtitle = group.by, + ylab = "avg_log2FC", xlab = "clusters", + hline = 2, + suffix = group.by ) } @@ -2946,17 +2972,18 @@ AutoLabelTop.logFC <- function( #' This function requires the output table of `FindAllMarkers()`. #' If you used `StoreAllMarkers()`, the output is stored under `@misc$df.markers$res...`, #' which is the default location. -#' @param obj A Seurat object to work with. Default: combined.obj. +#' @param obj A Seurat object to work with. Default: `combined.obj`. #' @param topN The top 'N' genes to consider. Default: 1. #' @param res The clustering resolution to use. Default: 0.5. #' @param KnownMarkers A character vector containing known marker genes to be used for annotation. -#' Default: c("TOP2A", "EOMES", "SLA", "HOPX", "S100B", "DLX6-AS1", "POU5F1", "SALL4", "DDIT4", -#' "PDK1", "SATB2", "FEZF2"). +#' Default: `c("TOP2A", "EOMES", "SLA", "HOPX", "S100B", "DLX6-AS1", "POU5F1", "SALL4", "DDIT4",` +#' `"PDK1", "SATB2", "FEZF2")`. #' @param order.by Specifies the column to sort the output tibble by. #' Default: 'combined.score' (First among "combined.score", "avg_log2FC", "p_val_adj"). #' @param df_markers The data frame of markers. By default, it is stored under #' `@misc$df.markers$res...` in the provided Seurat object. -#' Default: combined.obj@misc$df.markers[[paste0("res.", res)]]. +#' Default: `combined.obj@misc$df.markers[[paste0("res.", res)]]`. +#' #' @examples #' \dontrun{ #' if (interactive()) { @@ -2970,14 +2997,15 @@ AutoLabelTop.logFC <- function( #' @importFrom dplyr select slice AutoLabel.KnownMarkers <- function( obj = combined.obj, topN = 1, res = 0.5, - KnownMarkers = c( `dl-EN` = "KAZN", `ul-EN` = "SATB2", `Immature neurons` = "SLA" - , Interneurons = "DLX6-AS1", Interneurons = "ERBB4", InterN_CGE = "SCGN" - , `Intermediate progenitor` = "EOMES" - , `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C - , `oRG` = "HOPX" , Astrocyte = "S100B" - , `Hypoxia/Stress` = "DDIT4", Glycolytic = "PDK1" - , `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A" - , `Mesenchyme` = "DCN", `Choroid.Plexus` = "TTR" + KnownMarkers = c( + `dl-EN` = "KAZN", `ul-EN` = "SATB2", `Immature neurons` = "SLA", + Interneurons = "DLX6-AS1", Interneurons = "ERBB4", InterN_CGE = "SCGN", + `Intermediate progenitor` = "EOMES", + `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C + , `oRG` = "HOPX", Astrocyte = "S100B", + `Hypoxia/Stress` = "DDIT4", Glycolytic = "PDK1", + `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A", + `Mesenchyme` = "DCN", `Choroid.Plexus` = "TTR" ), order.by = c("combined.score", "avg_log2FC", "p_val_adj")[1], df_markers = obj@misc$"df.markers"[[paste0("res.", res)]]) { @@ -3081,11 +3109,11 @@ sparse.cor <- function(smat) { #' @description Calculate gene correlation on a Seurat object. #' @param assay.use The assay to use from the Seurat object. Default: 'RNA' #' @param slot.use The slot to use from the assay in the Seurat object. Default: 'data' -#' @param quantileX The quantile level for the calculation. Default: 0.95 -#' @param max.cells Maximum number of cells to be used in the calculation. Default: 40000 -#' @param seed The random seed used for the calculation. Default: p$seed -#' @param digits The number of decimal places to round the correlation and covariance values. Default: 2 -#' @param obj The Seurat object to perform calculations on. Default: combined.obj +#' @param quantileX The quantile level for the calculation. Default: `0.95` +#' @param max.cells Maximum number of cells to be used in the calculation. Default: `40000` +#' @param seed The random seed used for the calculation. Default: `p$seed` +#' @param digits The number of decimal places to round the correlation and covariance values. Default: `2` +#' @param obj The Seurat object to perform calculations on. Default: `combined.obj` #' @examples #' \dontrun{ #' if (interactive()) { @@ -3123,7 +3151,8 @@ Calc.Cor.Seurat <- function( genes.HE <- which_names(obj@misc[[quantile_name]] > 0) iprint("Pearson correlation is calculated for", length(genes.HE), "HE genes with expr.", qname, ": > 0.") tictoc::tic("sparse.cor") - ls.cor <- sparse.cor(smat = t(expr.mat[genes.HE, cells.use])); tictoc::toc() + ls.cor <- sparse.cor(smat = t(expr.mat[genes.HE, cells.use])) + tictoc::toc() ls.cor <- lapply(ls.cor, round, digits = 2) slot__name <- kpp(slot.use, assay.use, quantile_name) @@ -3141,15 +3170,15 @@ Calc.Cor.Seurat <- function( #' Useful for identifying groups of genes that exhibit similar expression patterns across different conditions #' or cell types in a Seurat object. #' -#' @param genes Vector of gene symbols to include in the correlation analysi. +#' @param genes Vector of gene symbols to include in the correlation analysis and heatmap. #' @param assay.use Assay from which to retrieve expression data within the Seurat object; Default: 'RNA'. -#' @param slot.use Specifies which slot of the assay to use for expression data ('data', 'scale.data', 'data.imputed'); -#' Default: first item ('data'). -#' @param quantileX Quantile level for calculating expression thresholds; Default: 0.95. -#' @param min.g.cor Minimum absolute gene correlation value for inclusion in the heatmap; Default: 0.3. -#' @param calc.COR Logical flag to calculate correlation matrix if not found in `@misc`; Default: FALSE. -#' @param cutRows Height at which to cut the dendrogram for rows, determining cluster formation; Default: NULL. -#' @param cutCols Height at which to cut the dendrogram for columns, determining cluster formation; +#' @param slot.use Specifies which slot of the assay to use for expression data `('data', 'scale.data', 'data.imputed')`; +#' Default: first item `('data')`. +#' @param quantileX Quantile level for calculating expression thresholds; Default: `0.95`. +#' @param min.g.cor Minimum absolute gene correlation value for inclusion in the heatmap; Default: `0.3`. +#' @param calc.COR Logical flag to calculate correlation matrix if not found in `@misc`; Default: `FALSE.` +#' @param cutRows Height at which to cut the dendrogram for rows, determining cluster formation; Default: `NULL.` +#' @param cutCols Height at which to cut the dendrogram for columns, determining cluster formation. #' Default: same as `cutRows`. #' @param obj Seurat object containing the data; Default: `combined.obj`. #' @param ... Additional parameters passed to the internally called functions. @@ -3486,7 +3515,6 @@ gene.name.check <- function(Seu.obj) { #' @seealso \code{\link[Seurat]{GetAssayData}}, \code{\link[DatabaseLinke.R]{qHGNC}} #' #' @export -#' @importFrom DatabaseLinke.R qHGNC #' @importFrom Seurat GetAssayData #' @importFrom Stringendo percentage_formatter #' @@ -3507,17 +3535,16 @@ check.genes <- function( missingGenes <- setdiff(list.of.genes, all_genes) if (length(missingGenes) > 0) { if (verbose) { - message( "\n", length(missingGenes), " or ", - Stringendo::percentage_formatter(length(missingGenes) / length(list.of.genes)), - " genes not found in the data, e.g: ", kppc(head(missingGenes, n = 10)) + message( + "\n", length(missingGenes), " or ", + Stringendo::percentage_formatter(length(missingGenes) / length(list.of.genes)), + " genes not found in the data, e.g: ", kppc(head(missingGenes, n = 10)) ) } + if (HGNC.lookup) { - if (exists("qHGNC", mode = "function")) { - try(DatabaseLinke.R::qHGNC(missingGenes, Open = F)) - } else { - warning("DatabaseLinke.R's qHGNC() function is needed, please install from github.", immediate. = TRUE) - } + stopifnot("Package 'DatabaseLinke.R' must be installed to use the 'HGNC.lookup' option." = require("DatabaseLinke.R")) + DatabaseLinke.R::qHGNC(missingGenes, Open = FALSE) } } tictoc::toc() @@ -3610,11 +3637,11 @@ CalculateFractionInTrome <- function( #' #' @description This function creates a new metadata column based on an existing metadata column #' and a list of mappings (name <- IDs). -#' @param obj A Seurat object for which the new annotation is to be created. Default is 'obj'. +#' @param obj A Seurat object for which the new annotation is to be created. Default: 'obj'. #' @param source A character string specifying the existing metadata column to be used as the -#' basis for the new annotation. Default is 'RNA_snn_res.0.5'. +#' basis for the new annotation. Default: 'RNA_snn_res.0.5'. #' @param named.list.of.identities A named list providing the mappings for the new annotation. -#' Default is 'ls.Subset.ClusterLists'. +#' Default: 'ls.Subset.ClusterLists'. #' @return A character vector representing the new metadata column. #' @examples #' \dontrun{ @@ -3709,10 +3736,10 @@ whitelist.subset.ls.Seurat <- function( #' #' @description Find correlated genes in a Seurat object #' @param gene Gene of interest. Default: 'TOP2A' -#' @param obj Seurat object to find the correlated genes from. Default: combined.obj +#' @param obj Seurat object to find the correlated genes from. Default: `combined.obj` #' @param assay Assay to be used from the Seurat object. Default: 'RNA' #' @param slot Slot to be used from the specified assay in the Seurat object. Default: 'data' -#' @param HEonly Logical, if TRUE, filters matrix to high-expressing genes only. Default: FALSE +#' @param HEonly Logical, if TRUE, filters matrix to high-expressing genes only. Default: `FALSE`. #' @param minExpr Minimum expression level for a gene to be considered. Default: 1 #' @param minCells Minimum number of cells expressing a gene for the gene to be considered. Default: 1000 #' @param trailingNgenes Number of top genes to consider based on their correlation. Default: 1000 @@ -3733,9 +3760,7 @@ whitelist.subset.ls.Seurat <- function( FindCorrelatedGenes <- function( gene = "TOP2A", obj = combined.obj, assay = "RNA", slot = "data", HEonly = FALSE, minExpr = 1, minCells = 1000, - trailingNgenes = 1000 - ) { - + trailingNgenes = 1000) { tictoc::tic("FindCorrelatedGenes") AssayData <- GetAssayData(object = obj, assay = assay, slot = slot) matrix_mod <- iround(as.matrix(AssayData)) @@ -3812,7 +3837,6 @@ UpdateGenesSeurat <- function(obj = ls.Seurat[[i]], species_ = "human", assay = EnforceUnique = TRUE, ShowStats = FALSE) { assays.present <- Assays(obj) for (assay in assays.present) { - message("Renaming in assay: ", assay, "...") all.genes <- Features(obj, assay = assay) @@ -3824,7 +3848,6 @@ UpdateGenesSeurat <- function(obj = ls.Seurat[[i]], species_ = "human", assay = } obj <- RenameGenesSeurat(obj, newnames = HGNC.updated$"Suggested.Symbol", assay = assay) - } return(obj) } @@ -4233,18 +4256,18 @@ PlotUpdateStats <- function(mat = UpdateStatMat, column.names = c("Updated (%)", #' (2) converts them to Seurat objects, and (3) saves them as .qs files #' #' @param InputDir A character string specifying the input directory. -#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default is FALSE. -#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default is 'filtered_feature'. +#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: `FALSE`. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'. #' @param suffix A character string specifying the suffix of the files saved. -#' @param min.cells An integer value specifying the minimum number of cells. Default is 5. -#' @param min.features An integer value specifying the minimum number of features. Default is 200. -#' @param updateHGNC A logical value indicating whether to update the HGNC. Default is TRUE. -#' @param save Save .qs object? Default: TRUE. -#' @param ShowStats A logical value indicating whether to show statistics. Default is TRUE. -#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default is TRUE. -#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default is 2. -#' @param sort_alphanumeric sort files alphanumeric? Default: TRUE. -#' @param save_empty_droplets save empty droplets? Default: TRUE. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 5. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`. +#' @param save Save .qs object? Default: `TRUE`. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`. +#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: `TRUE`. +#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2. +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`. +#' @param save_empty_droplets save empty droplets? Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -4266,7 +4289,6 @@ Convert10Xfolders <- function( sort_alphanumeric = TRUE, save_empty_droplets = TRUE, ...) { - stopifnot( is.character(InputDir), dir.exists(InputDir), is.logical(regex), is.character(folderPattern), is.character(suffix), is.numeric(depth), @@ -4289,17 +4311,19 @@ Convert10Xfolders <- function( for (i in 1:length(fin)) { print(i) - pathIN = Stringendo::FixPath(fin[i]); message(pathIN) - fnameIN = basename(dirname(dirname(pathIN))); message(fnameIN) + pathIN <- Stringendo::FixPath(fin[i]) + message(pathIN) + fnameIN <- basename(dirname(dirname(pathIN))) + message(fnameIN) - count_matrix <- Read10X(pathIN ) + count_matrix <- Read10X(pathIN) if (!is.list(count_matrix) | length(count_matrix) == 1) { seu <- CreateSeuratObject( counts = count_matrix, project = fnameIN, min.cells = min.cells, min.features = min.features ) } else { - ( stop('length(count_matrix) != 1') ) + (stop("length(count_matrix) != 1")) } ncells <- ncol(seu) @@ -4325,7 +4349,7 @@ Convert10Xfolders <- function( ReadWriter::write.simple.tsv(input_df = CBCs, manual_file_name = sppp(fnameIN, suffix, "CBC"), manual_directory = InputDir) } - if(save_empty_droplets & suffix == "raw") { + if (save_empty_droplets & suffix == "raw") { # Select and save empty droplets (the Soup) path_filtered <- gsub(x = pathIN, pattern = "/raw_feature_", replacement = "/filtered_feature_") @@ -4341,8 +4365,8 @@ Convert10Xfolders <- function( CBC_empty_drops <- setdiff(colnames(seu), CBCs_HQ) nr.empty.droplets <- length(CBC_empty_drops) umi_per_CBC <- colSums(seu@assays$RNA@layers$counts) - pct.empty.droplets.max10umis <- pc_TRUE(umi_per_CBC<11) - message("We have ", nr.empty.droplets, " empty droplets, ", pct.empty.droplets.max10umis, " of which have max 10 umis." ) + pct.empty.droplets.max10umis <- pc_TRUE(umi_per_CBC < 11) + message("We have ", nr.empty.droplets, " empty droplets, ", pct.empty.droplets.max10umis, " of which have max 10 umis.") FNM <- sppp("nr.empty.droplets", fnameIN, nr.empty.droplets) ReadWriter::write.simple.vec(nr.empty.droplets, manual_file_name = FNM, manual_directory = SoupDir) @@ -4353,17 +4377,13 @@ Convert10Xfolders <- function( # save the bulk RNA counts of the empty droplets Soup.Bulk.RNA <- rowSums(count_matrix[, CBC_empty_drops]) - f_path_out_Bulk <- Stringendo::ParseFullFilePath(path = SoupDir, file_name = sppp("Soup.Bulk.RNA", fnameIN), extension = 'qs') + f_path_out_Bulk <- Stringendo::ParseFullFilePath(path = SoupDir, file_name = sppp("Soup.Bulk.RNA", fnameIN), extension = "qs") qs::qsave(x = Soup.Bulk.RNA, file = f_path_out_Bulk, nthreads = nthreads, preset = preset) ReadWriter::write.simple.tsv(Soup.Bulk.RNA, suffix = fnameIN, manual_directory = SoupDir) - } } else { message("No empty droplets saved. suffix ", suffix) } - - - } # for } @@ -4376,16 +4396,16 @@ Convert10Xfolders <- function( #' containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, #' (2) converts them to Seurat objects, and (3) saves them as .RDS files. #' @param InputDir A character string specifying the input directory. -#' @param folderPattern A character string specifying the pattern of folder names to be searched. Default is 'SRR*'. -#' @param filePattern A character string specifying the pattern of file names to be searched. Default is 'expression.tsv.gz'. -#' @param useVroom A logical value indicating whether to use vroom. Default is TRUE. -#' @param col_types.vroom A list defining column types for vroom. Default is list("GENE" = "c", .default = "d"). -#' @param min.cells An integer value specifying the minimum number of cells. Default is 10. -#' @param min.features An integer value specifying the minimum number of features. Default is 200. -#' @param updateHGNC A logical value indicating whether to update the HGNC. Default is TRUE. -#' @param ShowStats A logical value indicating whether to show statistics. Default is TRUE. -#' @param minDimension An integer value specifying the minimum dimension. Default is 10. -#' @param overwrite A logical value indicating whether to overwrite files. Default is FALSE. +#' @param folderPattern A character string specifying the pattern of folder names to be searched. Default: 'SRR*'. +#' @param filePattern A character string specifying the pattern of file names to be searched. Default: 'expression.tsv.gz'. +#' @param useVroom A logical value indicating whether to use vroom. Default: `TRUE`. +#' @param col_types.vroom A list defining column types for vroom. Default: list("GENE" = "c", .default = "d"). +#' @param min.cells An integer value specifying the minimum number of cells. Default: 10. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`. +#' @param minDimension An integer value specifying the minimum dimension. Default: 10. +#' @param overwrite A logical value indicating whether to overwrite files. Default: `FALSE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -4395,9 +4415,9 @@ Convert10Xfolders <- function( #' @seealso #' \code{\link[vroom]{vroom}} #' \code{\link[readr]{read_delim}} -#' @export -#' @importFrom vroom vroom #' @importFrom readr read_tsv +#' +#' @export ConvertDropSeqfolders <- function( InputDir, folderPattern = "SRR*", filePattern = "expression.tsv.gz", @@ -4427,6 +4447,7 @@ ConvertDropSeqfolders <- function( CountTable <- list.files(subdir, pattern = filePattern, recursive = FALSE) stopifnot(length(CountTable) == 1) count_matrix <- if (useVroom) { + stopifnot("Package 'vroom' must be installed to use this function." = require("vroom")) vroom::vroom(file = kpps(subdir, CountTable), col_types = col_types.vroom) } else { readr::read_tsv(file = kpps(subdir, CountTable)) @@ -4460,13 +4481,13 @@ ConvertDropSeqfolders <- function( #' symbolic links (but not with aliases). #' @param InputDir A character string specifying the input directory. #' @param file.pattern A character string specifying the pattern of file names to be searched. -#' Default is '^filtered.+Rds$'. +#' Default: '^filtered.+Rds$'. #' @param string.remove1 A character string or FALSE. If a string is provided, it is removed from -#' file names. Default is "filtered_feature_bc_matrix.". +#' file names. Default: "filtered_feature_bc_matrix.". #' @param string.replace1 A character string of the new text instead of "string.remove1". #' @param string.remove2 A character string or FALSE. If a string is provided, it is removed from -#' file names. Default is ".min.cells.10.min.features.200.Rds". -#' @param sort_alphanumeric sort files alphanumeric? Default: TRUE. +#' file names. Default: ".min.cells.10.min.features.200.Rds". +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -4481,8 +4502,7 @@ LoadAllSeurats <- function( string.remove1 = list(FALSE, "filtered_feature_bc_matrix.", "raw_feature_bc_matrix.")[[2]], string.replace1 = "", string.remove2 = list(FALSE, ".min.cells.10.min.features.200.Rds")[[2]], - sort_alphanumeric = TRUE - ) { + sort_alphanumeric = TRUE) { tictoc::tic("LoadAllSeurats") InputDir <- FixPath(InputDir) @@ -4581,8 +4601,8 @@ read10x <- function(dir) { #' #' @description Save and RDS object and compress resulting file in the background using system(gzip). OS X or unix. #' @param obj Seurat object. -#' @param compress_internally Compress by R? Default: FALSE (still compressed in background via CLI). -#' @param compr Compress at all? Default: TRUE +#' @param compress_internally Compress by R? Default: `FALSE`. (still compressed in background via CLI). +#' @param compr Compress at all? Default: `TRUE`. #' @param fname File name #' @param ... Additional parameters passed to saveRDS() function. #' @seealso @@ -4605,21 +4625,21 @@ read10x <- function(dir) { #' #' @description Save an RDS object, using a faster and efficient compression method that runs in the background. #' @param obj The object to be saved, typically a Seurat object. -#' @param prefix A string prefix added to the filename. Default is NULL. -#' @param suffix A string suffix added to the filename. Default is NULL. +#' @param prefix A string prefix added to the filename. Default: NULL. +#' @param suffix A string suffix added to the filename. Default: NULL. #' @param inOutDir A boolean flag, if TRUE the OutDir is used as save directory, if FALSE the -#' alternative_path_rdata is used. Default is TRUE +#' alternative_path_rdata is used. Default: `TRUE`. #' @param project A string representing the project code. This is appended to the saved file name. -#' Default is the active project determined by getProject(). +#' Default: the active project determined by getProject(). #' @param alternative_path_rdata A string that specifies the alternative path for storing the -#' RDS file if inOutDir is FALSE. Default is "~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/" +#' RDS file if inOutDir is FALSE. Default: "~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/" #' appended with the basename of OutDir. -#' @param homepath A string representing the homepath. Will be replaced by '~' in the file path. Default is '~/'. +#' @param homepath A string representing the homepath. Will be replaced by '~' in the file path. Default: '~/'. #' @param showMemObject A boolean flag, if TRUE the function will print out the memory size of the -#' largest objects in the workspace. Default is TRUE. +#' largest objects in the workspace. Default: `TRUE`. #' @param saveParams A boolean flag, if TRUE the parameters 'p' and 'all.genes' are added to the -#' 'misc' slot of the Seurat object if the object is of class Seurat. Default is TRUE. -#' @param compress Compress .Rds file after writing? Default is TRUE. +#' 'misc' slot of the Seurat object if the object is of class Seurat. Default: `TRUE`. +#' @param compress Compress .Rds file after writing? Default: `TRUE`. #' @param test_read Provide command to test validity by reading in the object just written. #' @examples #' \dontrun{ @@ -4690,7 +4710,6 @@ isave.RDS <- function( #' @seealso \code{\link[qs]{qsave}} for the underlying save function used. #' @importFrom qs qsave #' @importFrom tictoc tic toc -#' @importFrom job job #' @importFrom rstudioapi isAvailable #' #' @export @@ -4722,8 +4741,8 @@ xsave <- function( annot.suffix <- if (is.list(obj)) kppd("ls", length(obj)) else NULL } - if(!isFALSE(saveParams)) message("paramList: ", if (exists("paramList")) paste(substitute(paramList), length(paramList), " elements.") else " not provided.") - if(!isFALSE(saveParams)) message("allGenes: ", if (exists("allGenes")) " found as global variable." else " not provided.") + if (!isFALSE(saveParams)) message("paramList: ", if (exists("paramList")) paste(substitute(paramList), length(paramList), " elements.") else " not provided.") + if (!isFALSE(saveParams)) message("allGenes: ", if (exists("allGenes")) " found as global variable." else " not provided.") try(tictoc::tic("xsave"), silent = TRUE) if (showMemObject & v) try(memory.biggest.objects(), silent = TRUE) @@ -4820,10 +4839,10 @@ xread <- function(file, if (set_m) { # if (!exists("m")) { - # m <- list.fromNames(colnames(obj@meta.data)) - m <- lapply(data.frame(obj@meta.data), function(x) head(unique(x), 50)) - assign("m", m, envir = .GlobalEnv) - message("Variable 'm', a list of @meta.data colnames and first 50 uq values, is now defined in the global environment.") + # m <- list.fromNames(colnames(obj@meta.data)) + m <- lapply(data.frame(obj@meta.data), function(x) head(unique(x), 50)) + assign("m", m, envir = .GlobalEnv) + message("Variable 'm', a list of @meta.data colnames and first 50 uq values, is now defined in the global environment.") # } else { # message("Variable 'm' already exists in the global environment, not overwritten") # } # exists("m") @@ -4849,10 +4868,10 @@ xread <- function(file, #' method that runs in the background. #' @param ... Additional parameters passed to the idate() function in the creation of the file name. #' @param path_rdata A string that specifies the path for storing the image of the workspace. -#' Default is "~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/" appended with the basename of OutDir. +#' Default: "~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/" appended with the basename of OutDir. #' @param showMemObject A boolean flag, if TRUE the function will print out the memory size of the -#' largest objects in the workspace. Default is TRUE. -#' @param options A string for gzip options. Default is "--force". +#' largest objects in the workspace. Default: `TRUE`. +#' @param options A string for gzip options. Default: "--force". #' @examples #' \dontrun{ #' if (interactive()) { @@ -4884,14 +4903,17 @@ isave.image <- function( #' @description Faster saving of workspace, and compression outside R, when it can run in the background. #' Seemingly quite CPU hungry and not very efficient compression. # #' @param ... Pass any other parameter to the internally called functions (most of them should work). -#' @param options Options passed on to gzip, via CLI. Default: c("--force", NULL)[1] +#' @param showMemObject Logical; if TRUE, the function will print out the memory size of the largest +#' objects in the workspace. Default: `TRUE`. +#' @param options Options passed on to gzip, via CLI. Default: `c("--force", NULL)[1]` #' @seealso #' \code{\link[Stringendo]{kollapse}}, \code{\link[function]{iprint}} #' @export #' @importFrom Stringendo kollapse iprint #' @importFrom tictoc tic toc -qsave.image <- function(..., showMemObject = TRUE, options = c("--force", NULL)[1] - ) { +qsave.image <- function( + ..., showMemObject = TRUE, + options = c("--force", NULL)[1]) { tictoc::tic("qsave.image") fname <- Stringendo::kollapse(getwd(), "/", basename(OutDir), idate(), ..., ".Rdata") @@ -4964,7 +4986,7 @@ clip10Xcellname <- function(cellnames) { #' @description Appends a specified suffix to cell names to mimic lane suffixes used in 10X datasets. #' #' @param cellnames A vector of cell names without numeric suffixes. -#' @param suffix The suffix to add to each cell name. Default is '_1'. +#' @param suffix The suffix to add to each cell name. Default: '_1'. #' @return A vector of cell names with the specified suffix appended. #' @examples #' cellnames <- c("cell1", "cell2") @@ -4993,8 +5015,9 @@ make10Xcellname <- function(cellnames, suffix = "_1") { #' @param CellRanger_outs_Dir CellRanger 'outs' (output) directory, Default: '~/Data/114593/114593' #' @param library_name Aka SampleName (the folder above 'outs;). #' @param out_dir_prefix Prefix for the output directory. Default: 'SoupStatistics' -#' @param add_custom_class Add a custom class of genes, matched by apattern in gene symbol. Default: TRUE -#' @param pattern_custom The pattern to match in gene symbol. Default: NA +#' @param add_custom_class Add a custom class of genes, matched by apattern in gene symbol. Default: `TRUE`. +#' @param pattern_custom The pattern to match in gene symbol. Default: `NA`. +#' @param ls.Alpha The alpha value for the label text. Default: 0.5. #' #' @seealso #' \code{\link[Matrix]{colSums}} @@ -5012,11 +5035,10 @@ make10Xcellname <- function(cellnames, suffix = "_1") { #' @export plotTheSoup <- function(CellRanger_outs_Dir = "~/Data/114593/114593", # library_name = str_extract(CellRanger_outs_Dir, "[[:alnum:]_]+(?=/outs/)"), - library_name = basename(gsub("/outs","",CellRanger_outs_Dir)), - out_dir_prefix = 'SoupStatistics', - add_custom_class = F, pattern_custom = "\\.RabV$", + library_name = basename(gsub("/outs", "", CellRanger_outs_Dir)), + out_dir_prefix = "SoupStatistics", + add_custom_class = FALSE, pattern_custom = "\\.RabV$", ls.Alpha = 1) { - iprint("library_name:", library_name) stopifnot( # Check input @@ -5025,7 +5047,7 @@ plotTheSoup <- function(CellRanger_outs_Dir = "~/Data/114593/114593", is.numeric(ls.Alpha) ) - if(add_custom_class) iprint("pattern_custom:", pattern_custom) + if (add_custom_class) iprint("pattern_custom:", pattern_custom) # The regular expression `[[:alnum:]_]+(?=/outs/)` matches one or more alphanumeric characters or # underscores that are followed by the `/outs/` portion in the string. It ensures that the desired @@ -5113,7 +5135,7 @@ plotTheSoup <- function(CellRanger_outs_Dir = "~/Data/114593/114593", Class[grep("^LINC", HGNC)] <- "LINC" Class[grep("^AC", HGNC)] <- "AC" Class[grep("^AL", HGNC)] <- "AL" - if (add_custom_class) Class[grep(pattern_custom, HGNC)] <- ReplaceSpecialCharacters(pattern_custom, remove_dots = T) + if (add_custom_class) Class[grep(pattern_custom, HGNC)] <- ReplaceSpecialCharacters(pattern_custom, remove_dots = TRUE) Nr.of.Genes.per.Class <- table(Class) @@ -5174,8 +5196,6 @@ plotTheSoup <- function(CellRanger_outs_Dir = "~/Data/114593/114593", as.character(gene), "" ))) ggsave(pgg, filename = file.path(OutDir, fname), width = 7, height = 7) - - } # for @@ -5456,7 +5476,7 @@ jPairwiseJaccardIndex <- function(binary.presence.matrix = df.presence) { #' @param obj1 The first Seurat object for comparison. Default: NULL. #' @param obj2 The second Seurat object for comparison. Default: NULL. #' @param cor.plot An optional boolean indicating whether to generate a scatterplot of the ranks -#' of common genes. Default: FALSE. +#' of common genes. Default: `FALSE`. #' @param plot_venn plot_venn #' @param suffix suffix #' @param save.plot save.plot @@ -5584,7 +5604,7 @@ compareVarFeaturesAndRanks <- function( # n_cores_detected <- as.numeric(system("nproc", intern = TRUE)) n_cores_detected <- as.numeric(system("echo $SLURM_CPUS_PER_TASK", intern = TRUE)) n_cores_avail <- min(n_cores_detected, n.cpus.def) - return(max(n_cores_avail, 1) ) + return(max(n_cores_avail, 1)) } @@ -5612,7 +5632,7 @@ compareVarFeaturesAndRanks <- function( if (is.null(param_list[[element]])) { warning(sprintf("`%s` is not defined", element), immediate. = TRUE, call. = FALSE) } else { - message(sprintf("`%s` is: %s", element, param_list[[element]]) ) + message(sprintf("`%s` is: %s", element, param_list[[element]])) } }, USE.NAMES = FALSE) @@ -5661,10 +5681,10 @@ compareVarFeaturesAndRanks <- function( #' @return Integer representing the number of principal components #' .getNrPCs <- function(obj, v = TRUE, reduc = "pca") { - if("pca" %in% names(obj@reductions)) { + if ("pca" %in% names(obj@reductions)) { ncol(obj@reductions[[reduc]]@"cell.embeddings") } else { - if(v) warning("No PCA cell embeddings found in object.", immediate. = TRUE) + if (v) warning("No PCA cell embeddings found in object.", immediate. = TRUE) NA } } @@ -5700,7 +5720,7 @@ compareVarFeaturesAndRanks <- function( } # Extract regression variables - regressionVariables <- func_slot$'vars.to.regress' + regressionVariables <- func_slot$"vars.to.regress" if (is.null(regressionVariables)) { if (v) message("No regression variables found in @commands") } else { @@ -5722,7 +5742,7 @@ compareVarFeaturesAndRanks <- function( #' @param regressionVariables A list or vector containing variables for regression. Default: NULL. #' If NULL, the function will attempt to extract the variables from the `object@commands$ScaleData`. #' @param nrVarFeatures You can provide this number manually. Default: NULL. -#' @param return.as.name If TRUE, returns the name of the object. Default: FALSE. +#' @param return.as.name If TRUE, returns the name of the object. Default: `FALSE`. #' @param assay The assay to extract scaled features from. Default: "RNA". #' @param suffix A suffix string to add. #' @param v Verbose? Default: `TRUE`. @@ -5734,15 +5754,13 @@ compareVarFeaturesAndRanks <- function( return.as.name = FALSE, assay = Seurat::DefaultAssay(obj), suffix = NULL, - v = T - ) { + v = TRUE) { + tictoc::tic(".parseKeyParams") - tictoc::tic('.parseKeyParams') + if (v) message(" > Running .parseKeyParams...") + scaledFeatures <- .getNrScaledFeatures(obj, assay, v = FALSE) - if(v) message(" > Running .parseKeyParams...") - scaledFeatures <- .getNrScaledFeatures(obj, assay, v= F) - - if (is.null(regressionVariables)) regressionVariables <- .getRegressionVariablesForScaleData(obj = obj, assay = assay, v = F) + if (is.null(regressionVariables)) regressionVariables <- .getRegressionVariablesForScaleData(obj = obj, assay = assay, v = FALSE) if (!is.null(nrVarFeatures)) { if (nrVarFeatures != scaledFeatures) { @@ -5786,7 +5804,6 @@ compareVarFeaturesAndRanks <- function( #' @importFrom checkmate assert_class assert_character assert_string .FindCommandInObject <- function(obj, pattern, perl = TRUE) { - command_names <- names(obj@commands) # Get all command names # Find matches using partial pattern matching @@ -5847,4 +5864,3 @@ compareVarFeaturesAndRanks <- function( # _________________________________________________________________________________________________ # Temp _____________________________ ------ # _________________________________________________________________________________________________ - diff --git a/R/Seurat.Utils.R.bac b/R/Seurat.Utils.R.bac new file mode 100644 index 0000000..9b8c603 --- /dev/null +++ b/R/Seurat.Utils.R.bac @@ -0,0 +1,5859 @@ +# ____________________________________________________________________ +# Seurat.utils ---- +# ____________________________________________________________________ +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.R") +# source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Metadata.R") +# source("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Visualization.R") +# source("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") + + +# devtools::check_man("~/GitHub/Packages/Seurat.utils") +# devtools::load_all("~/GitHub/Packages/Seurat.utils") +# devtools::document("~/GitHub/Packages/Seurat.utils"); devtools::load_all("~/GitHub/Packages/Seurat.utils") + +# file.remove("~/GitHub/Packages/Seurat.utils/weight3.bar.png") + + + +# _________________________________________________________________________________________________ +# One-stop-shop functions for entire pipelines _____________________________ ------ +# _________________________________________________________________________________________________ + +#' @title Process Seurat Objects in Parallel +#' +#' @description Applies a series of Seurat processing steps to each Seurat object in a list. +#' The operations include scaling data, running PCA, UMAP, finding neighbors, and finding clusters. +#' This is done in parallel using multiple cores. +#' +#' @param obj A Seurat object to be processed. +#' @param param.list A list of parameters used in the processing steps. +#' @param add.meta.fractions A boolean indicating whether to add meta data for fractions of cells in each cluster. Default: `FALSE`.. +#' @param compute A boolean indicating whether to compute the results. Default: `TRUE`.. +#' @param save A boolean indicating whether to save the results. Default: `TRUE`.. +#' @param plot A boolean indicating whether to plot the results. Default: `TRUE`.. +#' @param nfeatures The number of variable genes to use. Default: 2000. +#' @param variables.2.regress A list of variables to regress out. Default: NULL. +#' @param n.PC The number of principal components to use. Default: 30. +#' @param resolutions A list of resolutions to use for clustering. Default: c(0.1, 0.2, 0.3, 0.4, 0.5). +#' @param reduction_input The reduction method to use as input for clustering & UMAP. Default: "pca". +#' @param WorkingDir The working directory to save the results. Default: getwd(). +#' @param ... Additional parameters to be passed to `ScaleData()`. +#' +#' @return A Seurat object after applying scaling, PCA, UMAP, neighbor finding, and clustering. +#' +#' @examples +#' # Assuming ls.Seurat is a list of Seurat objects and params is a list of parameters +#' # results <- mclapply(ls.Seurat, processSeuratObject, params, mc.cores = 4) +#' +#' @importFrom Seurat ScaleData RunPCA RunUMAP FindNeighbors FindClusters +#' @importFrom tictoc tic toc +#' @importFrom harmony RunHarmony +#' +#' @export +processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE, + compute = TRUE, + save = TRUE, plot = TRUE, + nfeatures = param.list$"n.var.genes", + variables.2.regress = param.list$"variables.2.regress.combined", + n.PC = param.list$"n.PC", + resolutions = param.list$"snn_res", + reduction_input = "pca", + WorkingDir = getwd(), + ...) { + # + warning("Make sure you cleaned up the memory!", immediate. = TRUE) + message("\nWorkingDir: ", WorkingDir) + if (reduction_input == "harmony") message("Harmony integration is attempted, but it is experimental.") + stopifnot(require(tictoc)) + + tictoc::tic("processSeuratObject") + + # Assertions to check input types _________________________________________________ + stopifnot( + "Seurat" %in% class(obj), + is.list(param.list), + all(c("n.PC", "snn_res") %in% names(param.list)), + is.numeric(n.PC), is.numeric(resolutions), + is.character(variables.2.regress) | is.null(variables.2.regress), + "variables.2.regress is not found in @meta" = variables.2.regress %in% colnames(obj@meta.data) + ) + + iprint("nfeatures:", nfeatures) + iprint("n.PC:", n.PC) + iprint("snn_res:", resolutions) + iprint("variables.2.regress (combined):", variables.2.regress) + + # Save parameters _________________________________________________ + param.list$"n.var.genes" <- nfeatures + param.list$"variables.2.regress.combined" <- variables.2.regress + param.list$"n.PC" <- n.PC + param.list$"snn_res" <- resolutions + + # .checkListElements(param_list = param.list, elements = c("variables.2.regress.combined", "n.PC", "snn_res")) + + obj@misc$"p" <- param.list # overwrite previous parameters + + gc() + + if (add.meta.fractions) { + message("Adding meta data for gene-class fractions, eg. percent.mito, etc.") + obj <- addGeneClassFractions(obj) + } # end if add.meta.fractions + + if (compute) { + message("------------------- FindVariableFeatures -------------------") + tic("FindVariableFeatures") + obj <- FindVariableFeatures(obj, mean.function = "FastExpMean", + dispersion.function = "FastLogVMR", nfeatures = nfeatures); toc() + + tic('calc.q99.Expression.and.set.all.genes') + obj <- calc.q99.Expression.and.set.all.genes(obj = obj, quantileX = .99); toc() + + message("------------------- ScaleData -------------------") + tic('ScaleData') + obj <- ScaleData(obj, assay = "RNA", verbose = TRUE, vars.to.regress = variables.2.regress, ...); toc() + + + if (reduction_input == "harmony") { + message("------------------- Harmony - EXPERIMENTAL -------------------") + + m.REGR <- obj@meta.data[, variables.2.regress, drop=F] + any_regr_col_numeric <- sapply(m.REGR, is.numeric) + if(any(any_regr_col_numeric)){ + print("Some of the regression variables are numeric:") + print(any_regr_col_numeric) + stop("harmony cannot regress numeric variables") + } + + obj$'regress_out' <- apply(m.REGR,1, kppu) + if (nr.unique(obj$'regress_out') > 25) { + warning("The number of regress_out categories is too many (>25), consider serially calling harmony on each variable.", immediate. = TRUE) + } + if (min(table(obj$'regress_out')) < 5) { + warning("The number of cells in some regress_out categories is too few (<5), consider serially calling harmony on each variable.", immediate. = TRUE) + } + + nr_new_layers <- nr.unique(combined.obj$'regress_out') + nr_existing_layers <- (length(Layers(combined.obj))-1)/2 + if( nr_existing_layers != nr_new_layers) { + tic("Split layers by regress_out") + combined.obj[["RNA"]] <- split(combined.obj[["RNA"]], f = combined.obj$'regress_out'); toc() + } + + tic("RunHarmony") + obj <- harmony::RunHarmony(object = obj, group.by.vars = "regress_out", dims.use = 1:nPCs, plot_convergence = F); toc() + + tic("JoinLayers") + obj <- JoinLayers(obj, assay = "RNA"); toc() + obj@misc$'harmony.params' <- c( "nPCs" = nPCs, "regress" = variables.2.regress) + + } + + + + message("------------------- PCA /UMAP -------------------") + tic("PCA") + obj <- RunPCA(obj, npcs = n.PC, verbose = TRUE); toc() + tic("UMAP") + obj <- SetupReductionsNtoKdimensions(obj, nPCs = n.PC, reduction_output = "umap", + reduction_input = reduction_input, dimensions = 3:2); toc() + + message("------------------- FindNeighbors & Clusters -------------------") + tic("FindNeighbors") + obj <- FindNeighbors(obj, reduction = reduction_input, dims = 1:n.PC); toc() + + tic("FindClusters") + obj <- FindClusters(obj, resolution = resolutions); toc() + } + + + if (save) { + message("------------------- Saving -------------------") + create_set_OutDir(WorkingDir) + xsave(obj, suffix = "reprocessed", paramList = param.list) + } + + if (plot) { + message("------------------- Plotting -------------------") + + try(suPlotVariableFeatures(obj = obj, assay = "RNA"), silent = TRUE) + + try(scPlotPCAvarExplained(obj), silent = TRUE) + + try(qQC.plots.BrainOrg(obj = obj), silent = TRUE) + + # multi_clUMAP.A4(obj = obj) + + # res.ident <- paste0(DefaultAssay(obj), "_snn_res.", resolutions)[1:4] + try(qClusteringUMAPS(obj = obj), silent = TRUE) # , idents = res.ident + + if (ncol(obj) < 50000) try(qMarkerCheck.BrainOrg(obj = obj), silent = TRUE) + + + Signature.Genes.Top20 <- c( + `dl-EN` = "KAZN", `ul-EN` = "SATB2" # dl-EN = deep layer excitatory neuron + , `Immature neurons` = "SLA", Interneurons = "DLX6-AS1", + Interneurons = "ERBB4", Interneurons = "SCGN", + `Intermediate progenitor` = "EOMES" # , `Intermediate progenitor1` = "TAC3" + , `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C + , `oRG` = "HOPX", `oRG` = "ID4" # oRG outer radial glia + , Astroglia = "GFAP", + Astrocyte = "S100B", `Hypoxia/Stress` = "DDIT4", + `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A", + `Mesenchyme` = "DCN", Glycolytic = "PDK1", + `Choroid.Plexus` = "OTX2", `Mesenchyme` = "DCN" + ) + try(plotQUMAPsInAFolder(genes = Signature.Genes.Top20, obj = obj), silent = TRUE) + } + toc() + + return(obj) +} + +# _________________________________________________________________________________________________ +#' @title Run Differential Gene Expression Analysis (DGEA) +#' +#' @description Runs a differential gene expression analysis based on specified parameters, +#' reorders clusters if needed, and optionally saves results. Supports output and plotting configurations. +#' +#' @param obj Seurat object, assumed to be pre-configured with necessary data. +#' @param param.list List of parameters for DE analysis. Default: p. +#' @param res.analyzed.DE Vector of numeric values specifying the resolutions to analyze. +#' Default: c(0.1). +#' @param ident Use this to specify a non-standard cluster identity, such as named clusters. +#' `runDGEA` will use this ident explicitly for the DE analysis. Default: NULL. +#' @param reorder.clusters Logical indicating whether to reorder clusters based on dimension. +#' Default: `TRUE`.. +#' @param reorder.dimension Integer specifying the dimension for reordering (1 for x, -1 for y). +#' Default: 1. +#' @param add.combined.score Logical indicating whether to add a combined score to the markers. +#' Default: `TRUE`.. +#' @param save.obj Logical indicating whether to save the modified Seurat object. +#' Default: `TRUE`.. +#' @param directory Character string specifying the base directory for saving results. +#' Default: OutDir +#' @param dir_suffix Character string specifying the suffix for the subdirecotry directory. +#' @param subdirectory Character string specifying the subdirectory for saving outputs within +#' the base directory. Default: "DGEA + date". +#' @param calculate.DGEA Logical determining if the DE analysis should be calculated. +#' Default: `TRUE`.. +#' @param plot.DGEA Logical determining if results should be plotted. +#' Default: `TRUE`.. +#' @param umap_caption Character string specifying the caption for the UMAP plot. Default: "". +#' @param plot.av.enrichment.hist Logical indicating whether to plot the average enrichment histogram. +#' Default: `TRUE`.. +#' @param plot.log.top.gene.stats Logical indicating whether to plot the log top gene statistics. +#' @param auto.cluster.naming Logical indicating automatic labeling of clusters. +#' Default: `TRUE`.. +#' @param clean.misc.slot Logical indicating whether to clean the misc slots of previous +#' clustering results. Default: `TRUE`.. +#' @param clean.meta.data Logical indicating whether to clean the metadata slots of +#' previous clustering results. Default: `TRUE`.. +#' @param n.cores Integer specifying the number of cores to use for parallel processing (multisession). +#' Default: 1. +#' @param presto Logical indicating whether to use presto for DE analysis. Default: `TRUE`.. +#' @param WorkingDir Character string specifying the working directory. Default: getwd(). +#' +#' @importFrom future plan +#' @return Modified Seurat object and markers list. +#' @examples +#' runDGEA(obj = mySeuratObject, param.list = myListParams, directory = "Results/MyAnalysis") +#' +#' @export + +runDGEA <- function(obj, + param.list = p, + ident = NULL, + res.analyzed.DE = if(is.null(ident)) c(.1) else ident, # param.list$'res.analyzed.DE' + reorder.clusters = if(is.null(ident)) TRUE else FALSE, + reorder.dimension = 1, + # ordering = if(any(!testNumericCompatible(res.analyzed.DE))) "no" else "ordered", # param.list$"cl.annotation" + # ordering = "ordered", # param.list$"cl.annotation" + directory, + dir_suffix, + subdirectory = ppp("DGEA_res", idate()), + add.combined.score = TRUE, + save.obj = TRUE, + calculate.DGEA = TRUE, + plot.DGEA = TRUE, + umap_caption = "", + plot.av.enrichment.hist = TRUE, + plot.log.top.gene.stats = TRUE, + auto.cluster.naming = TRUE, + clean.misc.slot = TRUE, + clean.meta.data = TRUE, + n.cores = 1, + presto = TRUE, + WorkingDir = getwd() + ) { + + if(presto) require(presto) + message("\nWorkingDir: ", WorkingDir) + + # Assertions for input parameters + stopifnot( + is(obj, "Seurat"), + is.list(param.list), + "res.analyzed.DE should be numeric, explicit strings should be provided in: ident" = + is.numeric(res.analyzed.DE) | !is.null(ident), + dir.exists(directory) + ) + + create_set_OutDir(directory, subdirectory, newName = "dir_DGEA") + dir_DGEA <- OutDir + + # Log utilized parameters from param.list + { + message("cl.annotation: ", if(reorder.clusters) paste("ordered:", reorder.dimension) else "no") + message("test: ", param.list$"test") + message("only.pos: ", param.list$"only.pos") + message("---------------------------------") + message("return.thresh: ", param.list$"return.thresh") + message("logfc.threshold: ", param.list$"logfc.threshold") + message("min.pct: ", param.list$"min.pct") + message("min.diff.pct: ", param.list$"min.diff.pct") + message("min.cells.group: ", param.list$"min.cells.group") + message("max.cells.per.ident: ", param.list$"max.cells.per.ident") + } + + # Record changes in @misc$p + obj@misc$p$"res.analyzed.DE" <- if(is.null(ident)) res.analyzed.DE else ident + obj@misc$p$"cl.annotation" <- if(is.null(ident)) { + if(reorder.clusters) reorder.dimension else "no" + } else "character" + + # Retrieve analyzed DE resolutions + message("Resolutions analyzed:") + df.markers.all <- Idents.for.DEG <- list.fromNames(x = res.analyzed.DE) + + # browser() + if (clean.misc.slot) { + message("Erasing up the misc slot: df.markers and top.markers.resX") + topMslots <- grepv("top.markers.res", names(obj@misc)) + obj@misc[topMslots] <- NULL + } + + if (clean.meta.data) { + message("Erasing up the meta.data clustering columns.") + topMslots <- grepv("top.markers.res", names(obj@meta.data)) + cl.ordered <- GetOrderedClusteringRuns(obj = obj) + obj@meta.data[, cl.ordered ] <- NULL + # cl.names <- GetNamedClusteringRuns(obj = obj, pat = "^cl.names.*[0-1]\\.[0-9]", + # find.alternatives = FALSE) + # obj@meta.data[, c(cl.ordered, cl.names)] <- NULL + } + + # Loop through each resolution setting to find markers ________________________________________ + if (reorder.clusters) { + message("Renumbering ----------------------------------------") + for (i in 1:length(res.analyzed.DE)) { + res <- res.analyzed.DE[i] + create_set_OutDir(p0(dir_DGEA, ppp("res", res))) + message(i) + + # Reorder clusters based on average expression of markers + message("Reordering clusters along dimension: ", sign(reorder.dimension), "*", if (abs(reorder.dimension) == 1) "x" else "y") + obj <- AutoNumber.by.UMAP(obj = obj, + ident = GetClusteringRuns(res = res, obj = obj)[1], + dim = abs(reorder.dimension), reduction = "umap", + swap = (reorder.dimension < 0), plot = TRUE + ) + } # end for loop + } # end if reorder.clusters + + # Set up clustering identity for DE analysis _______________________________________________ + for (i in 1:length(res.analyzed.DE)) { + Idents.for.DEG[[i]] <- + if (!is.null(ident)) { + ident + } else { + if (reorder.clusters) { + GetOrderedClusteringRuns(res = res.analyzed.DE[i], obj = obj)[1] + } else { + GetClusteringRuns(res = res.analyzed.DE[i], obj = obj)[1] + } + } # end if is.null(ident) + stopifnot(Idents.for.DEG[[i]] %in% names(obj@meta.data)) + } # end for loop + + + + # Loop through each resolution setting to find markers ________________________________________ + if (n.cores>1) future::plan("multisession", workers = n.cores) + + if (calculate.DGEA) { + message("Calclulating ----------------------------------------") + for (i in 1:length(res.analyzed.DE)) { + res <- res.analyzed.DE[i] + tag.res <- ppp("res", res) + df.slot <- if(!is.null(ident)) ident else tag.res + + message("Resolution: ", res, " -----------") + create_set_OutDir(p0(dir_DGEA, tag.res)) + + message("Ident.for.DEG: ", Idents.for.DEG[[i]]) + Idents(obj) <- Idents.for.DEG[[i]] + + # Perform differential expression analysis + tic("FindAllMarkers") + df.markers <- Seurat::FindAllMarkers(obj, + verbose = TRUE, + test.use = param.list$"test", + logfc.threshold = param.list$"logfc.threshold", + return.thresh = param.list$"return.thresh", + min.pct = param.list$"min.pct", + min.diff.pct = param.list$"min.diff.pct", + min.cells.group = param.list$"min.cells.group", + max.cells.per.ident = param.list$"max.cells.per.ident", + only.pos = param.list$"only.pos", + + ); toc() + + + Stringendo::stopif(is.null(df.markers)) + + # order df.markers by logFC + df.markers <- df.markers[order(df.markers$"avg_log2FC", decreasing = TRUE), ] + + if (add.combined.score) df.markers <- Add.DE.combined.score(df.markers) + + obj@misc$"df.markers"[[df.slot]] <- df.markers + + # Save results to disk + fname <- ppp("df.markers", res) + ReadWriter::write.simple.tsv(df.markers, filename = fname, v = F) + df.markers.all[[i]] <- df.markers + xsave(df.markers, suffix = df.slot, v = F) + + } # end for loop + + # Save final results to disk + create_set_OutDir(directory, subdirectory) + + # Assign df.markers.all to global environment + ReadWriter::write.simple.xlsx(named_list = df.markers.all, filename = "df.markers.all") + assign("df.markers.all", df.markers.all, envir = .GlobalEnv) + + + if (save.obj) { + create_set_OutDir(WorkingDir) + tag <- if(is.null(ident)) kpp("res", res.analyzed.DE) else ident + xsave(obj, suffix = kpp("w.DGEA", tag)) + } + } # end if calculate.DGEA + + # Loop through each resolution setting to find markers ________________________________________ + if (plot.DGEA) { + message('Plotting results -----------------') + + for (i in 1:length(res.analyzed.DE)) { + res <- res.analyzed.DE[i] + message('Resolution: ', res) + tag.res <- ppp("res", res) + df.slot <- if (!is.null(ident)) ident else tag.res + + create_set_OutDir(p0(dir_DGEA, df.slot)) + + df.markers <- obj@misc$"df.markers"[[df.slot]] + Stringendo::stopif(is.null(df.markers)) + + PlotTopGenesPerCluster( + obj = obj, + cl_res = res, + df_markers = df.markers, + nrGenes = param.list$"n.markers", + order.by = param.list$"DEG.ranking" + ) + + # Automatic cluster labeling by top gene ________________________________________ + if (auto.cluster.naming) { + message('Automatic cluster labeling by top gene.') + + obj <- StoreAllMarkers(df_markers = df.markers, res = res, obj = obj) + obj <- AutoLabelTop.logFC(group.by = Idents.for.DEG[[i]], obj = obj, plot.top.genes = FALSE) # already plotted above + + clUMAP(ident = ppp("cl.names.top.gene", Idents.for.DEG[[i]]), obj = obj, caption = umap_caption) + } # end if auto.cluster.naming + + # Plot per-cluster gene enrichment histogram ________________________________________ + if (plot.av.enrichment.hist) { + message('Plotting per-cluster gene enrichment histogram.') + # create_set_OutDir(directory, subdirectory) + + df.markers.tbl <- as_tibble(df.markers) + df.markers.tbl$'cluster' <- as.character(df.markers.tbl$'cluster') + p.deg.hist <- ggpubr::gghistogram(df.markers.tbl, x = "avg_log2FC", + title = "Number of enriched genes per cluster", + subtitle = "Binned by Log2(FC)", + caption = paste(res, "| vertical line at FC of 2."), + rug = TRUE, + color = "cluster", fill = "cluster", + facet.by = 'cluster', xlim = c(0,3), + ylab = "Nr. D.E. Genes") + + geom_vline(xintercept = 1) + + theme_linedraw() + + qqSave(ggobj = p.deg.hist, w = 10, h = 6, title = ppp("Enrichment log2FC per cluster",res)) + } + + # Plot per-cluster enriched gene counts ________________________________________ + if (plot.log.top.gene.stats) { + message('Plotting per-cluster enriched gene counts.') + + # Filter genes with avg_log2FC > 2 + lfc2_hiSig_genes <- df.markers |> + dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) |> + group_by(cluster) |> + arrange(cluster, desc(avg_log2FC)) + + top_genes <- lfc2_hiSig_genes |> + top_n(1, avg_log2FC) |> + pull(gene) + + # Get the number of genes per cluster + (NrOfHighlySignLFC2_genes <- lfc2_hiSig_genes |> + summarise(n = n()) |> + deframe() |> + sortbyitsnames()) + + qbarplot(NrOfHighlySignLFC2_genes, label = NrOfHighlySignLFC2_genes, + plotname = "Number of diff. genes per cluster", + sub = 'Genes with avg_log2FC > 1 and p_val_adj < 0.05', + xlab = "Clusters", ylab = "Number of diff. genes" + ) + + # Write out gene lists per cluster ________________________________________ + { + # Get the genes in a list per cluster + genes_list <- lfc2_hiSig_genes |> + group_by(cluster) |> + summarise(genes = list(gene)) |> + select(genes) |> + deframe() + + names(genes_list) <- unique(lfc2_hiSig_genes$"cluster") + genes_list <- sortbyitsnames(genes_list) + names(genes_list) <- ppp("cl", names(genes_list), top_genes,"DGs") + + # write out the gene list, each element to a txt file. + create_set_OutDir(p0(dir_DGEA, ppp("res", res), "/top_genes")) + for (i in 1:l(genes_list)) { + write.simple.vec(input_vec = genes_list[[i]], filename = names(genes_list)[i], v = F ) + } # for cluster + } + } # end if plot.log.top.gene.stats + } # end for loop of resolutions + } # end if plot.DGEA + # create_set_OutDir(directory, subdirectory) + + # Return obj and df.markers.all to global environment + return(obj) + create_set_Original_OutDir() +} # end runDGEA + + + +# _________________________________________________________________________________________________ +# General ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +#' @title Update Seurat Object Properly, including Assays and DimReducs +#' +#' @description This function is an extension on `SeuratObject::UpdateSeuratObject()`. It +#' first calls `UpdateSeuratObject()`, to updates the class definitions of of a (v3) Seurat object, +#' then it updates its assays to the 'Assay5' class, and updates the UMAP DimReduc to keys. +#' +#' @param obj A Seurat object to be updated. Default: None. +#' @param update.gene.symbols Logical. If TRUE, gene symbols are updated to the latest version. +#' +#' @return An updated Seurat object. +#' +#' @importFrom SeuratObject UpdateSeuratObject +#' @examples +#' \dontrun{ +#' combined.obj <- UpdateSeuratObjectProperly(combined.obj) +#' } +#' +#' @export +UpdateSeuratObjectProperly <- function(obj, update.gene.symbols = TRUE) { + # Input assertions + stopifnot(is(obj, "Seurat")) + + warning("This function is not yet fully tested. Use with caution.", immediate. = TRUE) + message("Input obj. version: ", obj@version) + + # Update Object Structure (not Assays, etc.) _________ + if (obj@version < "5") { + obj <- SeuratObject::UpdateSeuratObject(obj) + } else { + message("Object already updated to version 5. Skipping 'UpdateSeuratObject()'.") + } + + # Update assays individually __________________ + existing_assays <- names(obj@assays) + message("Updating assays to 'Assay5' class. Found: \n", existing_assays) + for (assay in existing_assays) { + obj[[assay]] <- as(obj[[assay]], Class = "Assay5") + } + + # Update UMAP DimReduc manually __________________ + umap.exists <- !is.null(obj@reductions$umap) + if (umap.exists) { + message("Updating UMAP DimReduc to keys.") + colnames(obj@reductions$umap@cell.embeddings) <- tolower(colnames(obj@reductions$umap@cell.embeddings)) + obj@reductions$umap@key <- tolower(obj@reductions$umap@key) + } else { + message("No UMAP DimReduc found. Skipping.") + } + + if (update.gene.symbols) { + message("Updating gene symbols to the latest version.") + obj <- Seurat.utils::UpdateGenesSeurat(obj) + } + + message("Output obj. version: ", obj@version) + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title parallel.computing.by.future +#' +#' @description Run gc(), load multi-session computing and extend memory limits. +#' @param cores Number of cores +#' @param maxMemSize memory limit +#' +#' @export +parallel.computing.by.future <- function(cores = 4, maxMemSize = 4000 * 1024^2) { + # https://satijalab.org/seurat/v3.0/future_vignette.html + cat( + "1. If you load futures before you finished using foreach loops, + NormalizeData inside a foreach loop fails (Error writing to connection) + -> I assume 'future' and 'doMC' are not compatible + + 2. If you setup computing on e.g. six cores, it runs 6 instances of R with the entire memory space copied. + If you run out of memory, the system starts using the SSD as memory, and it slows you down extremely extremely extremely. + -> Therefore it is important to clean up the memory space before setting up multicore computation. + + Loaded: library(future), workers set to 6 (def),set Max mem size to 2GB (def)." + ) + + gc(full = TRUE) + try(memory.biggest.objects(), silent = TRUE) + user_input <- readline(prompt = "Are you sure that memory should not be cleaned before paralellizng? (y/n)") + + if (user_input == "y") { + iprint("N. cores", cores) + library(future) + # plan("multiprocess", workers = cores) + plan("multisession", workers = cores) + # So to set Max mem size to 2GB, you would run : + options(future.globals.maxSize = maxMemSize) + } else { + print("No parallelization") + } +} + + +# _________________________________________________________________________________________________ +#' @title Intersect Genes with Seurat Object +#' +#' @description Intersects a set of gene names with those found in a Seurat object. +#' @param genes A vector of gene names to be intersected with the Seurat object. +#' @param obj A Seurat object containing gene expression data. +#' @param n_genes_shown Number of missing genes to be printed. Default: 10. +#' @param strict All genes to be present in the Seurat object? Default: `TRUE`.. +#' @param verbose verbose +#' @return A vector of gene names that are found both in the input 'genes' vector and the +#' Seurat object. +#' +#' @export +IntersectGeneLsWithObject <- function(genes, obj = combined.obj, n_genes_shown = 10, + species_ = "human", EnforceUnique = TRUE, ShowStats = TRUE, + strict = TRUE, verbose = TRUE) { + message(" > Running IntersectGeneLsWithObject()...") + # "formerly IntersectWithExpressed(), which still exist in gruffi." + + stopifnot( + is.character(genes), + is(obj, "Seurat"), + is.numeric(n_genes_shown) && n_genes_shown > 0, + is.logical(strict) + ) + stopifnot(length(genes) > 0, length(rownames(obj)) > 0) + + # Strict mode: Ensure all genes are present in the Seurat object + all.genes.found <- all(genes %in% rownames(obj)) + if (!all.genes.found) { + symbols.missing <- setdiff(genes, rownames(obj)) + iprint(length(symbols.missing), "symbols.missing:", symbols.missing) + message(" > Running HGNChelper::checkGeneSymbols() to update symbols") + + HGNC.updated <- HGNChelper::checkGeneSymbols(genes, unmapped.as.na = FALSE, map = NULL, species = species_) + if (ShowStats) { + HGNC.updated + print(GetUpdateStats(HGNC.updated)) + } + + if (EnforceUnique) HGNC.updated <- HGNC.EnforceUnique(HGNC.updated) + genes <- HGNC.updated$Suggested.Symbol + + # UpdateSymbolList(symbols.missing) # Does not catch CTIP2 !!! + if (strict) stopifnot(all(genes %in% rownames(obj))) + } + + # Finding genes that are missing in the Seurat object + missing_in_obj <- setdiff(genes, rownames(obj)) + if (verbose) { + Stringendo::iprint( + length(missing_in_obj), " (of ", length(genes), + ") genes are MISSING from the Seurat object with (", length(rownames(obj)), + ") genes. E.g.:", head(missing_in_obj, n_genes_shown) + ) + } + + # Finding genes that are found in both the input list and the Seurat object + g_found <- intersect(genes, rownames(obj)) + + # Output argument assertion + stopifnot(length(g_found) > 0) + + return(g_found) +} + +# _________________________________________________________________________________________________ + +#' @title Intersect Genes with the List of Noticeably Expressed Genes +#' +#' @description Intersects a vector of gene names with a Seurat object to find genes that are both +#' in the input list and have expression levels in the top quantiles as defined by the object's +#' q99 expression data. It aims to filter genes based on their expression levels being above a +#' specified threshold. Additionally, it offers an option to sort the genes by their expression +#' levels in decreasing order. +#' +#' @param genes A vector of gene names to be intersected with the Seurat object. +#' @param obj A Seurat object containing gene expression data. Default: `combined.obj`. +#' @param above The expression level threshold above which genes are considered noticeably +#' expressed. Default: 0. +#' @param sort A logical flag indicating whether to sort the filtered genes by their expression +#' levels in decreasing order. Default: `FALSE`.. +#' @return A vector of gene names that are found both in the input 'genes' vector and the Seurat +#' object, and have expression levels above the specified 'above' threshold. If `sort` is TRUE, +#' these genes are returned in decreasing order of their expression levels. +#' +#' @examples +#' # Assuming `genes` is a vector of gene names and ` +#' +#' @export +SelectHighlyExpressedGenesq99 <- function(genes, obj = combined.obj, + above = 0, sort = FALSE, strict = FALSE) { + message(" > Running SelectHighlyExpressedGenesq99()...") + stopifnot(is.character(genes), is(obj, "Seurat"), is.numeric(above)) + + genes.expr <- IntersectGeneLsWithObject(genes = genes, obj = obj, verbose = FALSE, strict = strict) + if (length(genes.expr) < length(genes)) message("Some genes not expressed. Recommend to IntersectGeneLsWithObject() first.") + + q99.expression <- obj@misc$expr.q99 + print(pc_TRUE(q99.expression == 0, suffix = "of genes at q99.expression are zero")) + genes.expr.high <- q99.expression[genes.expr] + if (sort) genes.expr.high <- sort.decreasing(genes.expr.high) + print(genes.expr.high) + genes.filt <- names(genes.expr.high)[genes.expr.high > above] + + SFX <- kppws("of the genes are above min. q99 expression of:", above) + print(pc_TRUE(genes.expr %in% genes.filt, suffix = SFX)) + + return(genes.filt) +} + + + + +# _________________________________________________________________________________________________ +#' @title SmallestNonAboveX +#' +#' @description replace small values with the next smallest value found, which is >X. +#' @param vec Numeric input vector +#' @param X Threshold, Default: 0 +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' SmallestNonZero(vec = df.markers$"p_val") +#' } +#' } +#' @export +SmallestNonAboveX <- function(vec, X = 0) { + newmin <- min(vec[vec > X]) + vec[vec <= X] <- newmin + vec +} + + +# _________________________________________________________________________________________________ +#' @title AreTheseCellNamesTheSame +#' +#' @description Assert and compare two character vectors (e.g.: cell IDs) how much they overlap and +#' plot a Venn Diagram. The function aborts with an error if overlap is too small. +#' @param vec1 Character vector, eg. with cell names +#' @param vec2 Character vector, eg. with cell names +#' @param names Names for plotting +#' @param min.overlap Threshold below there is no there is no meaningful overlap between the two vectors. +#' +#' @export +#' @examples # reTheseCellNamesTheSame() +AreTheseCellNamesTheSame <- function( + vec1 = names(UVI.annot), + vec2 = names(nr_UVI), + names = c("Cells in Targ.Ampl", "Cells in GEX"), + min.overlap = 0.33) { + Cellname.Overlap <- list(vec1, vec2) + names(Cellname.Overlap) <- if (!isFALSE(names)) names else c(substitute(vec1), substitute(vec2)) + + cells.in.both <- intersect(vec1, vec2) + sbb <- percentage_formatter(length(cells.in.both) / length(vec2), suffix = "of cells (GEX) in have a UVI assigned") + ggExpress::qvenn(Cellname.Overlap, subt = sbb) + iprint("Venn Diagramm saved.") + iprint(sbb) + + Nr.overlapping <- length(intersect(vec1, vec2)) + Nr.total <- length(union(vec1, vec2)) + Percent_Overlapping <- Nr.overlapping / Nr.total + print("") + report <- percentage_formatter(Percent_Overlapping, + prefix = "In total,", + suffix = paste("of the cellIDs overlap across", names(Cellname.Overlap)[1], "and", names(Cellname.Overlap)[2]) + ) + print(report[1]) + stopifnot(Percent_Overlapping > min.overlap) +} + + +# _________________________________________________________________________________________________ +#' @title Add to Misc or Tools Slot +#' +#' @description This function creates and adds a sub-slot to either the 'misc' or 'tools' slot of a +#' Seurat object. If the sub-slot already exists, it can either be overwritten or a warning will be issued. +#' +#' @param obj A Seurat object. +#' @param pocket_name Which main pocket to use: 'misc' or 'tools'. Default: 'misc'. +#' @param slot_value The value to be assigned to the sub-slot. +#' @param slot_name The name of the sub-slot. Automatically derived from 'sub_slot_value' if not provided. +#' @param sub_slot_value The value to be assigned to the sub-slot. +#' @param sub_slot_name The name of the sub-slot. Automatically derived from 'sub_slot_value' if not provided. +#' @param overwrite A boolean indicating whether to overwrite an existing sub-slot with the same name. +#' +#' @return The modified Seurat object with the new or updated sub-slot. +#' +#' @export +addToMiscOrToolsSlot <- function(obj, pocket_name = "misc", + slot_value = NULL, + slot_name = deparse(substitute(slot_value)), + sub_slot_value = NULL, + sub_slot_name = deparse(substitute(sub_slot_value)), + overwrite = FALSE) { + message("Running addToMiscOrToolsSlot()...") + + stopifnot(is(obj, "Seurat"), + pocket_name %in% c("misc", "tools"), + is.character(slot_name), length(slot_name) == 1, + is.character(sub_slot_name), length(sub_slot_name) == 1, + "slot name or value is provided" = is.null(slot_value) || !is.null(slot_name), + "sub_slot name or value is provided" = is.null(sub_slot_value) || !is.null(sub_slot_name) + ) + + # Accessing the specified slot + pocket <- slot(object = obj, name = pocket_name) + + # Creating new sub_slot or reporting if it exists + if (slot_name %in% names(pocket) && !overwrite) { + warning(paste(slot_name, "in", pocket_name, "already exists. Not overwritten."), immediate. = TRUE) + } else { + pocket[[slot_name]] <- slot_value + } + + # Creating new sub_sub_slot or reporting if it exists + if (sub_slot_name %in% names(pocket[[slot_name]]) && !overwrite) { + warning(paste(sub_slot_name, "in", pocket_name, "@", slot_name, "already exists. Not overwritten."), immediate. = TRUE) + } else { + pocket[[slot_name]][[sub_slot_name]] <- sub_slot_value + } + + + # Assigning the modified slot back to the object + slot(object = obj, name = pocket_name) <- pocket + + return(obj) +} + +# _________________________________________________________________________________________________ +#' @title Display Slots in the @tools of an Seurat Object +#' +#' @description `showToolsSlots` prints the names of slots in the `@tools` of a given object. +#' It specifically targets list elements, skipping over data frames and other non-list objects. +#' +#' @param obj An object whose `@tools` slot needs to be examined. +#' @param max.level The maximum level of nesting to print. +#' @param subslot The name of a sub-slot within the `@tools` slot to examine. +#' @param ... Additional arguments to be passed to `str`. +#' +#' @details +#' The function iterates over the slots in the `@tools` of `obj`. If a slot is a list +#' (and not a data frame), it prints the names of elements within this list. If the slot +#' is not a list or is a data frame, it skips printing the names. The function currently +#' does not use the `indent` parameter but it could be incorporated in future enhancements +#' to control the formatting of the output. +#' +#' @examples showToolsSlots(obj) +#' +#' @export +showToolsSlots <- function(obj, max.level = 1, subslot = NULL, ...) { + slotX <- if (is.null(subslot)) obj@tools else obj@tools[[subslot]] + str(slotX, max.level = max.level, ...) + + # tools_slot <- names(obj@tools) + # # i=4 + # for (i in seq(tools_slot)) { + # cat("", fill = TRUE) + # message("obj@tools$", tools_slot[i]) + # + # x <- obj@tools[[tools_slot[i]]] + # if (!is.data.frame(x) & is.list(x)) { + # print(paste(" ", names(x)), width = 12) + # } else { + # return(idim(x)) + # } + # } +} + + +# _________________________________________________________________________________________________ +#' @title Display Slots in the @misc of an Seurat Object +#' +#' @description See `showToolsSlots` for details. Prints the names of slots in the `@misc` of a given object. +#' It specifically targets list elements, skipping over data frames and other non-list objects. +#' +#' @param obj An object whose `@misc` slot needs to be examined. Default: `combined.obj` +#' @param max.level Max depth to dive into sub-elements. +#' @param subslot A subslot within `@misc`. +#' @param ... ... +#' +#' @examples showToolsSlots(obj) +#' +#' @export +showMiscSlots <- function(obj = combined.obj, max.level = 1, subslot = NULL, + ...) { + slotX <- if (is.null(subslot)) obj@misc else obj@misc[[subslot]] + str(slotX, max.level = max.level, ...) + + # Path to slot + msg <- paste0(substitute(obj), "@misc") + if (!is.null(subslot)) msg <- paste0(msg, "$", substitute(subslot)) + message(msg) +} + + + + +# _________________________________________________________________________________________________ +#' @title calc.q99.Expression.and.set.all.genes + +#' @description Calculate the gene expression of the e.g.: 99th quantile (expression in the top 1% cells). +#' @param obj Seurat object, Default: `combined.obj` +#' @param quantileX Quantile level, Default: 0.9 +#' @param max.cells Max number of cells to do the calculation on. Downsample if excdeeded. Default: 1e+05 +#' @param slot slot in the Seurat object. Default: 'data' +#' @param assay RNA or integrated assay, Default: c("RNA", "integrated")[1] +#' @param set.misc Create the "all.genes" variable in @misc? Default: `TRUE`. +#' @param assign_to_global_env Create the "all.genes" variable in the global env?, Default: `TRUE`. +#' @param plot Plot the expression distribution? Default: `TRUE`. +#' @param show Show the distribution plot? Default: `TRUE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- calc.q99.Expression.and.set.all.genes( +#' obj = combined.obj, quantileX = 0.9, +#' max.cells = 25000 +#' ) +#' head(sort(as.numeric.wNames(obj@misc$expr.q90), decreasing = TRUE)) +#' combined.obj <- calc.q99.Expression.and.set.all.genes( +#' obj = combined.obj, quantileX = 0.95, +#' max.cells = 25000, set.all.genes = FALSE +#' ) +#' } +#' } +#' @seealso +#' \code{\link[sparseMatrixStats]{character(0)}} +#' @importFrom tictoc tic toc +#' @importFrom sparseMatrixStats rowQuantiles +#' +#' @export + +calc.q99.Expression.and.set.all.genes <- function( + obj = combined.obj, + quantileX = 0.99, max.cells = 1e5, + slot = "data", + assay = c("RNA", "integrated", "SCT")[1], + set.misc = TRUE, + assign_to_global_env = TRUE, + suffix = as.character(substitute(obj)), + plot = TRUE, + show = TRUE + ) { + message("slot: ", slot, " assay: ", assay, ".\n") + + tictoc::tic("calc.q99.Expression.and.set.all.genes") + stopifnot( + is(obj, "Seurat"), + quantileX > 0 & quantileX < 1, + max.cells > 1e3, max.cells < 1e6, + is.logical(set.misc), is.logical(assign_to_global_env), is.logical(plot), is.logical(show), + is.character(suffix) + ) + + warnifnot( + slot %in% c("data", "scale.data", "counts"), + assay %in% c("RNA", "integrated") + ) + + # Get the data matrix ____________________________________________________________ + assay_data <- obj@assays[[assay]] + if (obj@version >= "5") { + if(assay == "RNA") { + layers <- assay_data@layers + message(length(layers), " layers in RNA assay") + stopifnot(slot %in% names(layers)) + data_mtx <- layers[[slot]] + } else { + data_mtx <- slot(assay_data, slot) + } + } else { + data_mtx <- assay_data[[slot]] + } + + # Downsample if the number of cells is too high _________________________________________________ + if (ncol(data_mtx) > max.cells) { + dsampled <- sample(x = 1:ncol(data_mtx), size = max.cells) + data_mtx <- data_mtx[, dsampled] + message("Downsampled from ", ncol(obj), "to ", max.cells, " cells") + } + + # Calculate the number of cells in the top quantile (e.g.: 99th quantile) that is + # required to for gene expression to be >0 + n.cells <- floor(ncol(data_mtx) * (1 - quantileX)) + message("Each gene has to be expressed in min. ", n.cells, " cells, to have >0 quantile-expression\n", + "quantileX: ", quantileX, " max.cells: ", max.cells) + + # Prepare for plotting ____________________________________________________________ + qname <- paste0("q", quantileX * 100) + slot_name <- kpp("expr", qname) + + print("Calculating Gene Quantiles") + expr.q99.df <- sparseMatrixStats::rowQuantiles(data_mtx, probs = quantileX) + expr.q99 <- iround(expr.q99.df) + + log2.gene.expr.of.the.90th.quantile <- as.numeric(log2(expr.q99 + 1)) # strip names + qnameP <- paste0(100 * quantileX, "th quantile") + + # Plot the distribution of gene expression in the 99th quantile _________________________________ + if(plot){ + pobj <- ggExpress::qhistogram(log2.gene.expr.of.the.90th.quantile, + plotname = paste("Gene expression in the", qnameP, " in", suffix), + ext = "pdf", breaks = 30, + subtitle = kollapse(pc_TRUE(expr.q99 > 0, NumberAndPC = TRUE), " genes have ", qname, " expr. > 0 (in ", n.cells, " cells)."), + caption = paste(n.cells, "cells in", qnameP, "from", ncol(data_mtx), "cells in (downsampled) object."), + suffix = suffix, + xlab = paste0("log2(expr. in the ", qnameP, "quantile+1) [UMI]"), + ylab = "Nr. of genes", + plot = T, save = TRUE, + vline = .15, + filtercol = TRUE, + palette_use = "npg" + ) + tictoc::toc() + if(show) print(pobj) + } + + + all.genes <- percent_rank(expr.q99) + names(all.genes) <- names(expr.q99) + all.genes <- as.list(sort(all.genes, decreasing = TRUE)) + + if (assign_to_global_env) assign("all.genes", all.genes, envir = as.environment(1)) + + # if (set.all.genes) obj@misc$'all.genes' = all.genes + if (set.misc) obj@misc[[slot_name]] <- expr.q99 + + iprint( + "Quantile", quantileX, "is now stored under obj@misc$all.genes and $", slot_name, + " Please execute all.genes <- obj@misc$all.genes." + ) + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Filter Coding Gene Symbols (or any matching input Patterns) +#' +#' @description This function filters out gene names that match specified patterns. It reports +#' the original and final number of gene symbols and the percentage remaining after filtering. +#' It filters out non-coding gene symbols by default. +#' +#' @param genes A character vector of gene symbols. +#' @param pattern_NC A character vector of patterns to filter out non-coding gene symbols. +#' Default: c("^AC.", "^AL.", "^c[1-9]orf", "\\.AS[1-9]$"). +#' @param v "verbose" Whether to print the number of genes before and after filtering. +#' @param unique Whether to return unique gene symbols. Default: `TRUE`.. +#' @param ... Additional arguments to pass to \code{\link[stringr]{str_detect}}. +#' +#' @return A character vector of filtered gene symbols. +#' +#' @examples +#' genes <- c("AC123", "AL456", "c1orf7", "TP53", "BRCA1", "X1.AS1", "MYC") +#' genes_kept <- filterNcGenes(genes) +#' print(genes_kept) +#' +#' @importFrom stringr str_detect +#' @export +#' +filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]"), + v = TRUE, unique = TRUE, ...) { + + # Input assertions + stopifnot(is.character(genes), length(genes) > 0, + is.character(pattern_NC), length(pattern_NC) > 0 + ) + + # Filter the genes + combined_pattern <- paste(pattern_NC, collapse = "|") + genes_discarded <- genes[stringr::str_detect(genes, combined_pattern)] + iprint("Example discarded", CodeAndRoll2::trail(genes_discarded)) + + genes_kept <- genes[stringr::str_detect(genes, combined_pattern, negate = TRUE)] + + # Report original and final list sizes and percentage remaining + if(v) { + original_length <- length(genes) + filtered_length <- length(genes_kept) + percentage_remaining <- (filtered_length / original_length) * 100 + + message("Original number of gene symbols: ", original_length) + message("Filtered number of gene symbols: ", filtered_length) + message("Percentage remaining: ", round(percentage_remaining, 2), "%") + } + + # Output assertions + stopifnot(is.character(genes_kept), length(genes_kept) <= original_length) + + if(unique) genes_kept <- unique(genes_kept) + + return(genes_kept) +} + + + + + +# _________________________________________________________________________________________________ +# Clustering ______________________________ ---- +# _________________________________________________________________________________________________ + + + +# _________________________________________________________________________________________________ +#' @title RenameClustering +#' +#' @description Rename clustering in a Seurat object. +#' @param namedVector named vector, where values = new, names(vec) = old +#' @param orig.ident meta.data colname original +#' @param suffix.new.ident How to name (suffix) the new identity. Default: "ManualNames" +#' @param new.ident meta.data colname new +#' @param suffix.plot Suffix description (short string) to be added to the umap plots. +#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @param obj Seurat object +#' +#' @export + +RenameClustering <- function( + namedVector = ManualNames, + orig.ident = "RNA_snn_res.0.3", + suffix.new.ident = "ManualNames", + new.ident = ppp(orig.ident, suffix.new.ident), + obj = combined.obj, + suffix.plot = "", + plot_umaps = TRUE, + ...) { + + NewX <- CodeAndRoll2::translate( + vec = as.character(obj@meta.data[, orig.ident]), + old = names(namedVector), + new = namedVector + ) + + obj <- AddMetaData(object = obj, metadata = NewX, col.name = new.ident) + + iprint("new.ident is", new.ident, "created from", orig.ident) + print("") + + if (plot_umaps) { + stopifnot(is.character(suffix.plot)) + suffix.plot <- if (nchar(suffix.plot)) make.names(suffix.plot) + print(clUMAP(orig.ident, suffix = suffix.plot, sub = suffix.plot, obj = obj, ...)) + print(clUMAP(new.ident, suffix = suffix.plot, sub = suffix.plot, obj = obj, ...)) + clUMAP(new.ident, suffix = suffix.plot, sub = suffix.plot, label = F, obj = obj, ...) + } else { + iprint("New ident:", new.ident) + } + + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Shorten Clustering Names +#' +#' @description This function takes in a string representing a clustering name, +#' and shortens it according to specific rules. It replaces "snn_res." with "", +#' "best.matching.names" with "bmatch", "ordered" with "ord", +#' "ManualNames" with "mNames", and ".long" at the end of the string with ".L". +#' +#' @param str A character string representing the clustering name to be shortened. +#' +#' @return A character string representing the shortened clustering name. +#' +#' @examples +#' \dontrun{ +#' shorten_clustering_names("RNA_snn_res.0.5.ordered.ManualNames") # Returns 'RNA.0.5.ord.mNames' +#' shorten_clustering_names("RNA_snn_res.0.3.best.matching.names.ManualNames.long") # Returns 'RNA.0.3.bmatch.mNames.L' +#' shorten_clustering_names("RNA_snn_res.1.7.ordered.ManualNames.Simplest") # Returns 'RNA.1.7.ord.mNames.Simplest' +#' shorten_clustering_names("RNA_snn_res.0.5.ordered.ManualNames.Simpler") # Returns 'RNA.0.5.ord.mNames.Simpler' +#' } +#' +#' @export +shorten_clustering_names <- function(str) { + # Replace 'snn_res' with nothing + str <- gsub("snn_res.", "", str) + # Replace 'best.matching.names' with 'bmatch' + str <- gsub("best.matching.names", "bmatch", str) + # Replace 'ordered' with 'ord' + str <- gsub("ordered", "ord", str) + # Replace 'ManualNames' with 'mNames' + str <- gsub("ManualNames", "mNames", str) + # Replace 'long' with 'L' + str <- gsub(".long$", ".L", str) + return(str) +} + + + +# _________________________________________________________________________________________________ +#' @title Retrieve Cluster Names +#' +#' @description Extracts cluster names based on a specified identity class from a Seurat object. +#' +#' @param obj A Seurat object. Default: `combined.obj`. +#' @param ident The identity class from which to retrieve cluster names. +#' Default uses the second clustering run from `GetClusteringRuns(obj)`. +#' @examples +#' \dontrun{ +#' getClusterNames(obj = combined.obj, ident = GetClusteringRuns(obj)[2]) +#' } +#' @return Prints and returns the sorted unique cluster names as a character vector. +#' @export +getClusterNames <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[2]) { + iprint("ident used:", ident) + clz <- as.character(sort(deframe(unique(obj[[ident]])))) + cat(dput(clz)) +} + + + +# _________________________________________________________________________________________________ +#' @title GetClusteringRuns +#' +#' @description The `GetClusteringRuns` function retrieves metadata column names associated with +#' clustering runs, based on a pattern to match, `"*snn_res.[0-9].[0-9]$"`, by default. +#' @param obj Seurat object, Default: `combined.obj` +#' @param res Clustering resoluton to use, Default: `FALSE`. +#' @param pat Pattern to match, Default: `*snn_res.*[0-9]$` +#' @param v verbose, Default: `TRUE`. +#' +#' @return Prints and returns the sorted unique cluster names as a character vector. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' GetClusteringRuns(obj = combined.obj, pat = "*snn_res.*[0-9]$") +#' } +#' } +#' @export +GetClusteringRuns <- function(obj = combined.obj, + res = FALSE, pat = "*snn_res.[0-9].[0-9]+$", + v = TRUE) { + + # Get clustering results + clustering.results <- sort(CodeAndRoll2::grepv(x = colnames(obj@meta.data), pattern = pat)) + + # Check if no clustering results were found + if (identical(clustering.results, character(0))) if(v) warning("No matching (simple) clustering column found!", immediate. = TRUE) + + if (!isFALSE(res)) { + # Extract numeric values from clustering.results + clustering.res.found.numeric <- as.numeric(sub(".+_snn_res.", "", clustering.results)) + + # Filter clustering.results based on the numeric vector res + clustering.results <- clustering.results[clustering.res.found.numeric %in% res] + if(length(clustering.results) == 0) warning("No clustering matches `res`!", immediate. = TRUE) + + } + + if(v) { + message("Clustering runs found:") + dput(clustering.results) + } + + return(clustering.results) +} + +# _________________________________________________________________________________________________ +#' @title GetNamedClusteringRuns +#' +#' @description The `GetNamedClusteringRuns` function retrieves metadata column names associated with +#' non-numeric ("named") clustering runs, based on a pattern to match, `"Name|name"`, by default. +#' @param obj Seurat object, Default: `combined.obj` +#' @param res Clustering resoluton to use, Default: c(FALSE, 0.5)[1] +#' @param topgene Match clustering named after top expressed gene (see vertesy/Seurat.pipeline/~Diff gene expr.), Default: `FALSE`. +#' @param pat Pattern to match, Default: '^cl.names.Known.*[0,1]\.[0-9]$' +#' @param find.alternatives If TRUE, tries to find alternative clustering runs with +#' the same resolution, Default: `TRUE`. +#' @param v Verbose output, Default: `TRUE`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' GetNamedClusteringRuns() +#' } +#' } +#' @export +GetNamedClusteringRuns <- function( + obj = combined.obj, + res = list(FALSE, 0.5)[[1]], topgene = FALSE, + pat = c("^cl.names.top.gene.+[0-9]\\.[0-9]", "Name|name")[2], + find.alternatives = TRUE, + v = TRUE) { + + if (res) pat <- gsub(x = pat, pattern = "\\[.*\\]", replacement = res) + if (topgene) pat <- gsub(x = pat, pattern = "Known", replacement = "top") + clustering.results <- CodeAndRoll2::grepv(x = colnames(obj@meta.data), pattern = pat) + + if (identical(clustering.results, character(0))) { + if(v) warning("No matching (named) clustering column found! Trying GetClusteringRuns(..., pat = '*_res.*[0,1]\\.[0-9]$)", immediate. = TRUE ) + if (find.alternatives) clustering.results <- + GetClusteringRuns(obj = obj, res = FALSE, pat = "*_res.*[0,1]\\.[0-9]$", v = F) + } + + if(v) dput(clustering.results) + + return(clustering.results) +} + + + +# _________________________________________________________________________________________________ +#' @title GetOrderedClusteringRuns +#' +#' @description Get Clustering Runs: metadata column names. +#' @param obj Seurat object, Default: `combined.obj`. +#' @param res Clustering resoluton to use, Default: `FALSE`. +#' @param pat Pattern to match, Default: '*snn_res.*[0,1]\.[0-9]\.ordered$' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' GetOrderedClusteringRuns() +#' GetOrderedClusteringRuns(res = 0.5) +#' } +#' } +#' @export +GetOrderedClusteringRuns <- function(obj = combined.obj, res = FALSE, + pat = "*snn_res.*[0,1]\\.[0-9]\\.ordered$") { + if (res) pat <- gsub(x = pat, pattern = "\\[.*\\]", replacement = res) + clustering.results <- CodeAndRoll2::grepv(x = colnames(obj@meta.data), pattern = pat) + if (identical(clustering.results, character(0))) warning("No matching (ordered) clustering column found!", immediate. = TRUE) + return(clustering.results) +} + + + +# _________________________________________________________________________________________________ +#' @title GetNumberOfClusters +#' +#' @description Get Number Of Clusters # +#' @param obj Seurat object, Default: `combined.obj` +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' GetNumberOfClusters() +#' } +#' } +#' @export +GetNumberOfClusters <- function(obj = combined.obj) { # Get Number Of Clusters + clustering.results <- GetClusteringRuns(obj) + print("## Number of clusters: ---------") + for (cc in clustering.results) { + NrCl <- length(unique(obj@meta.data[[cc]])) + iprint(cc, " ", NrCl) + } +} + + +# _________________________________________________________________________________________________ +#' @title calc.cluster.averages +#' +#' @description Calculates the average of a metadata column (numeric) per cluster. +#' @param col_name The name of the column for which the average is calculated. Default: 'Score.GO.0006096'. +#' @param plot.UMAP.too Whether to plot a UMAP as well. Default: `TRUE`.. +#' @param return.plot Whether to return the plot. Default: `FALSE`.. +#' @param obj The main Seurat object used for calculations. Default: `combined.obj`. +#' @param split_by Cluster to split by. Default: First entry of GetNamedClusteringRuns(). +#' @param scale.zscore Whether to scale z-scores. Default: `FALSE`.. +#' @param simplify Whether to simplify the result. Default: `TRUE`.. +#' @param plotit Whether to plot the results. Default: `TRUE`.. +#' @param histogram Whether to produce a histogram. Default: `FALSE`.. +#' @param nbins The number of bins for the histogram. Default: 50. +#' @param suffix Suffix added to the filename. Default: NULL. +#' @param stat Statistical method applied, "mean" or "median". Default: "median". +#' @param quantile.thr The threshold for quantiles. Default: 0.9. +#' @param absolute.thr Absolute threshold used in computations. Default: `FALSE`.. +#' @param filter The filter mode: 'above', 'below', or FALSE. Default: `FALSE`.. +#' @param ylab.text Text for the y-axis label. Default: "Cluster" followed by the statistical method and "score". +#' @param title Title for the plot. Default: "Cluster" followed by the statistical method and column name. +#' @param subtitle The subtitle for the plot. Default: NULL. +#' @param width The width of the plot. Default: 8. +#' @param height The height of the plot. Default: 6. +#' @param ... Additional parameters passed to the internally called functions. +#' @param xlb The label for the x-axis. Default depends on the 'absolute.thr' parameter. +#' @param fname The filename for the plot. Default: based on column name and split_by value. +#' @export +#' @importFrom Stringendo percentage_formatter + +calc.cluster.averages <- function( + col_name = "Score.GO.0006096", + plot.UMAP.too = TRUE, + return.plot = FALSE, + obj = combined.obj, + split_by = GetNamedClusteringRuns()[1], + scale.zscore = FALSE, + simplify = TRUE, plotit = TRUE, + histogram = FALSE, nbins = 50, + suffix = NULL, + stat = c("mean", "median")[2], + quantile.thr = 0.9, + absolute.thr = FALSE, + filter = c(FALSE, "above", "below")[1], + ylab.text = paste("Cluster", stat, "score"), + title = paste("Cluster", stat, col_name), + prefix.cl.names = FALSE, + report = TRUE, + subtitle = NULL, + width = 8, height = 6, + ... + # , ylb = paste(ylab.text, col_name) + # , xlb = paste("Clusters >",Stringendo::percentage_formatter(quantile.thr),"quantile are highlighted. |", split_by) + , xlb = if (absolute.thr) { + paste("Threshold at", absolute.thr) + } else { + paste( + "Black lines: ", kppd(Stringendo::percentage_formatter(c(1 - quantile.thr, quantile.thr))), "quantiles |", + "Cl. >", Stringendo::percentage_formatter(quantile.thr), "are highlighted. |", split_by + ) + }, + fname = ppp(col_name, split_by, "cluster.average.barplot.pdf", ...)) { # calc.cluster.averages of a m + iprint(substitute(obj), "split by", split_by) + if (absolute.thr) iprint("In case of the absolute threshold, only the returned values are correct, the plot annotations are not!") + + if (plot.UMAP.too) qUMAP(obj = obj, feature = col_name) + + df.summary <- + obj@meta.data |> + select_at(c(col_name, split_by)) |> + group_by_at(split_by) |> + summarize( + "nr.cells" = n(), + "mean" = mean(!!sym(col_name), na.rm = TRUE), + "SEM" = sem(!!sym(col_name), na.rm = TRUE), + "median" = median(!!sym(col_name), na.rm = TRUE), + "SE.median" = 1.2533 * sem(!!sym(col_name), na.rm = TRUE) + ) + + if (simplify) { + av.score <- df.summary[[stat]] + names(av.score) <- if (!isFALSE(prefix.cl.names)) ppp("cl", df.summary[[1]]) else df.summary[[1]] + av.score <- sortbyitsnames(av.score) + if (scale.zscore) av.score <- (scale(av.score)[, 1]) + + cutoff <- if (absolute.thr) absolute.thr else quantile(av.score, quantile.thr) + cutoff.low <- if (absolute.thr) NULL else quantile(av.score, (1 - quantile.thr)) + + iprint("quantile.thr:", quantile.thr) + if (plotit) { + if (histogram) { + p <- ggExpress::qhistogram( + vec = as.numeric(av.score), save = FALSE, + vline = cutoff, + plotname = ppp(title, quantile.thr), + bins = nbins, + subtitle = paste(subtitle, "| median in blue/dashed"), + ylab = ylab.text, + xlab = xlb # Abused + , xlab.angle = 45 + # , ylim = c(-1,1) + , ... + # , ext = "png", w = 7, h = 5 + ) + geom_vline(xintercept = cutoff.low, lty = 2) + print(p) + title_ <- ppp(title, suffix, flag.nameiftrue(scale.zscore)) + ggExpress::qqSave(ggobj = p, title = title_, ext = "png", w = width, h = height) + } else { + p <- ggExpress::qbarplot( + vec = av.score, save = FALSE, + hline = cutoff, + plotname = title, + suffix = quantile.thr, + subtitle = subtitle, + ylab = ylab.text, + xlab = xlb # Abused + , xlab.angle = 45 + # , ylim = c(-1,1) + , ... + # , ext = "png", w = 7, h = 5 + ) + geom_hline(yintercept = cutoff.low, lty = 2) + + print(p) + title_ <- ppp(title, suffix, flag.nameiftrue(scale.zscore)) + qqSave(ggobj = p, title = title_, fname = ppp(title_, split_by, "png"), w = width, h = height) + } + } + + if (report) print(paste0(col_name, ": ", paste(iround(av.score), collapse = " vs. "))) + if (filter == "below") { + return(filter_LP(av.score, threshold = cutoff, plot.hist = FALSE)) + } else if (filter == "above") { + return(filter_HP(av.score, threshold = cutoff, plot.hist = FALSE)) + } else { + return(av.score) + } + } else if (return.plot) { # if /not/ simplify + return(p) + } else { + return(df.summary) + } +} + + + + + + +# _________________________________________________________________________________________________ +#' @title plot.expression.rank.q90 +#' +#' @description Plot gene expression based on the expression at the 90th quantile +#' (so you will not lose genes expressed in few cells). +#' @param obj Seurat object, Default: `combined.obj` +#' @param gene gene of interest, Default: 'ACTB' +#' @param filterZero Remove genes whose quantile-90 expression in 0? Default: `TRUE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' plot.expression.rank.q90(gene = "SATB2") +#' } +#' } +#' @importFrom Stringendo percentage_formatter +#' @importFrom MarkdownReports whist +#' +#' @export plot.expression.rank.q90 +plot.expression.rank.q90 <- function(obj = combined.obj, gene = "ACTB", filterZero = TRUE) { + expr.GOI <- obj@misc$expr.q90[gene] + expr.all <- unlist(obj@misc$expr.q90) + gene.found <- gene %in% names(expr.all) + stopifnot(gene.found) + + if (expr.GOI == 0) iprint(gene, "is not expressed. q90-av.exp:", expr.GOI) else if (expr.GOI < 0.05) iprint(gene, "is lowly expressed. q90-av.exp:", expr.GOI) + if (filterZero) { + iprint("Zero 'q90 expression' genes (", pc_TRUE(expr.all == 0), ") are removed.") + expr.all <- expr.all[expr.all > 0] + } + counts <- sum(obj@assays$RNA@counts[gene, ]) + if (expr.GOI == 0) { + quantile.GOI <- 0 + title <- paste(gene, "is too lowly expressed: q90-av.exp is zero. \n There are", counts, "counts.") + } else { + pos.GOI <- which(names(expr.all) == gene) + quantile.GOI <- ecdf(expr.all)(expr.all)[pos.GOI] + title <- paste( + gene, "is in the", Stringendo::percentage_formatter(quantile.GOI), + "quantile of 'q90-av' expression. \n There are", counts, "counts" + ) + } + suppressWarnings( + MarkdownReports::whist(expr.all, + vline = expr.GOI, breaks = 100, main = title, plotname = make.names(title), + ylab = "Genes", + xlab = "Av. mRNA in the 10% top expressing cells (q90 av.exp.)" + ) + ) +} + + + + +# _________________________________________________________________________________________________ +# Interacting with the environment ______________________________ ---- +# _________________________________________________________________________________________________ +# Subsetting, downsampling and manipulating the Seurat object + + + +# _________________________________________________________________________________________________ +#' @title set.mm +#' +#' @description Helps to find metadata columns. It creates a list with the names of of 'obj@meta.data'. +#' @param obj Seurat object, Default: `combined.obj` +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' set.mm() +#' mm +#' } +#' } +#' @export +set.mm <- function(obj = combined.obj) { + mm <- CodeAndRoll2::list.fromNames(colnames(obj@meta.data)) + assign(x = "mm", value = mm, envir = as.environment(1)) +} + + +# _________________________________________________________________________________________________ +#' @title Get the First Seurat Object from a List of Seurat Objects +#' +#' @description #' If provided with a list of Seurat objects, this function returns the first +#' Seurat object in the list. If the input is a single Seurat object, it returns +#' the object itself. It is assumed that all elements of the list are Seurat +#' objects if the input is a list. +#' +#' @param obj A Seurat object, a list of Seurat objects, or any other list. +#' +#' @return The first Seurat object from the list or the Seurat object itself. +#' If the input is not a Seurat object or a list containing at least one Seurat +#' object, the function will throw an error. +#' @export +ww.get.1st.Seur.element <- function(obj) { + if (is(obj)[1] == "list") { + iprint("A list of objects is provided, taking the 1st from", length(obj), "elements.") + obj <- obj[[1]] + } + stopifnot(is(obj) == "Seurat") + return(obj) +} +# ww.get.1st.Seur.element(ls.Seurat[[1]]) + + +# _________________________________________________________________________________________________ +#' @title Recall all.genes global variable from a Seurat object +#' +#' @description all.genes set by calc.q99.Expression.and.set.all.genes() # +#' @param obj Seurat object, Default: `combined.obj` +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' recall.all.genes() +#' all.genes +#' } +#' } +#' @importFrom MarkdownHelpers ww.assign_to_global +#' +#' @export +recall.all.genes <- function(obj = combined.obj, overwrite = FALSE) { + obj <- ww.get.1st.Seur.element(obj) + + if ("all.genes" %in% names(obj@misc)) { + if (!exists("all.genes") | overwrite) { + all.genes <- obj@misc$all.genes + print(head(unlist(all.genes))) + MarkdownHelpers::ww.assign_to_global(name = "all.genes", value = all.genes, verbose = FALSE) + message("all.genes is now (re)defined in the global environment.") + } else { + message(" -> Variable 'all.genes' exits in the global namespace, and overwrite is: FALSE") + } + } else { + message(" -> Slot 'all.genes' does not exist in obj@misc.") + hits <- grepv(pattern = "expr.", names(obj@misc)) + if (!is.null(hits)) { + message("Found instead (", hits, "). Returning 1st element: ", hits[1]) + all.genes <- obj@misc[[hits[1]]] + MarkdownHelpers::ww.assign_to_global(name = "all.genes", value = as.list(all.genes), verbose = FALSE) + } + } +} + + +# _________________________________________________________________________________________________ +#' @title recall.meta.tags.n.datasets +#' +#' @description Recall meta.tags from obj@misc to "meta.tags" in the global environment. +#' @param obj Seurat object, Default: `combined.obj` +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' recall.n.datasets() +#' n.datasets +#' } +#' } +#' @importFrom MarkdownHelpers ww.assign_to_global +#' +#' @export +recall.meta.tags.n.datasets <- function(obj = combined.obj) { + obj <- ww.get.1st.Seur.element(obj) + + if ("n.datasets" %in% names(obj@misc)) { + if (!exists("n.datasets")) { + n.datasets <- obj@misc$n.datasets + print(head(unlist(n.datasets))) + MarkdownHelpers::ww.assign_to_global(name = "n.datasets", value = n.datasets) + } else { + print(" -> Variable 'n.datasets' already exists in the global namespace.") + } + } else { + print(" -> Slot 'n.datasets' does not exist in obj@misc.") + } + + + if ("meta.tags" %in% names(obj@misc)) { + if (!exists("meta.tags")) { + meta.tags <- obj@misc$meta.tags + print(head(unlist(meta.tags))) + MarkdownHelpers::ww.assign_to_global(name = "meta.tags", value = meta.tags) + } else { + iprint(" -> Variable 'meta.tags' already exists in the global namespace.") + } + } else { + print(" -> Slot 'meta.tags' does not exist in obj@misc.") + } +} + + +# _________________________________________________________________________________________________ +#' @title recall.parameters +#' +#' @description Recall parameters from obj@misc to "p" in the global environment. +#' @param obj Seurat object, Default: `combined.obj` +#' @param overwrite Overwrite already existing in environment? Default: `FALSE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' recall.parameters() +#' p +#' } +#' } +#' @importFrom MarkdownHelpers ww.assign_to_global +#' +#' @export +recall.parameters <- function(obj = combined.obj, overwrite = FALSE) { + obj <- ww.get.1st.Seur.element(obj) + + if ("p" %in% names(obj@misc)) { + p_found <- exists("p", envir = .GlobalEnv) + if (p_found) message(" -> Variable 'p' exits in the global namespace.") + + if (!p_found | (p_found & overwrite == TRUE)) { + MarkdownHelpers::ww.assign_to_global(name = "p", value = obj@misc$"p", verbose = F) + message("p is now (re)defined in the global environment.") + } else { + message("p not overwritten.") + } + } else { + message(" -> Slot 'p' does not exist in obj@misc.") + } +} + + + +# _________________________________________________________________________________________________ +#' @title recall.genes.ls +#' +#' @description Recall genes.ls from obj@misc to "genes.ls" in the global environment. +#' @param obj Seurat object, Default: `combined.obj` +#' @param overwrite Overwrite already existing in environment? Default: `FALSE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' recall.genes.ls() +#' genes.ls +#' } +#' } +#' @importFrom MarkdownHelpers ww.assign_to_global +#' +#' @export + +recall.genes.ls <- function(obj = combined.obj, overwrite = FALSE) { # genes.ls + obj <- ww.get.1st.Seur.element(obj) + + if ("genes.ls" %in% names(obj@misc)) { + if (!exists("genes.ls")) message("variable 'genes.ls' exits in the global namespace: ", head(p)) + + if (!exists("genes.ls") | (exists("genes.ls") & overwrite == TRUE)) { + MarkdownHelpers::ww.assign_to_global(name = "genes.ls", value = obj@misc$"genes.ls") + message("Overwritten.") + } else { + message("Not overwritten.") + } + } else { + message(" -> Slot 'genes.ls' does not exist in obj@misc.") + } +} + + +# _________________________________________________________________________________________________ +#' @title Save Parameters to Seurat Object +#' +#' @description Stores a list of parameters within the `@misc$p` slot of a Seurat object, +#' allowing for easy reference and tracking of analysis parameters used. +#' +#' @param obj Seurat object to update; Default: `combined.obj`. +#' @param params List of parameters to save; Default: `p`. +#' @param overwrite Logical indicating if existing parameters should be overwritten; Default: `TRUE`.. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' save.parameters(obj = combined.obj, params = p) +#' } +#' } +#' +#' @export +save.parameters <- function(obj = combined.obj, params = p, overwrite = TRUE) { + obj <- ww.get.1st.Seur.element(obj) + + if (!is.null(obj@misc$"p") && overwrite) { + print("Overwriting already existing obj@misc$p. Old version:") + print(head(unlist(obj@misc$"p"))) + obj@misc$p <- params + } else if (is.null(obj@misc$"p")) { + obj@misc$p <- params + } +} + + + +# _________________________________________________________________________________________________ +# List level metadata for ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Create Single-Cell Metadata Object for a collection of Seurat Objects +#' +#' @description This function creates a metadata object to correspond to a list of +#' single-cell experiments, for storing parent level information. +#' It initializes the object with the experiment and project name, and the +#' creation date. The created object is of class 'scMetadata_class'. +#' @param experiment The name of the experiment for which metadata is being created. +#' @param project_ The project information to be associated with the metadata object. +#' This defaults to the current project obtained using Seurat.utils::getProject(). +#' +#' @return An 'scCollectionMetadata_class' object containing the metadata for a collection of experiment. +#' @export +#' +#' @examples +#' sc_meta <- create_scCombinedMeta(experiment = "Experiment1") +create_scCombinedMeta <- function(experiment, project_ = getProject()) { + x <- list( + experiment.corresponding = experiment, + initialized = format(Sys.time(), format = "%Y.%m.%d | %H:%M:%S"), + project = project_ + ) + class(x) <- "scCollectionMetadata_class" + print(x) + return(x) +} + + +# _________________________________________________________________________________________________ +# Merging objects and @misc ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Copy Specified Elements from One Seurat Object's @misc to Another's +#' +#' @description Copies specified elements from the `@misc` slot of one Seurat object to the `@misc` slot +#' of another. It warns if some specified elements are missing in the source object or if elements are +#' overwritten in the destination object, depending on the `overwrite` argument. +#' +#' @param obj.from The source Seurat object from which elements in the `@misc` slot are to be copied. +#' @param obj.to The destination Seurat object to which elements in the `@misc` slot are to be copied. +#' @param elements.needed A vector of strings specifying the names of the elements in the `@misc` slot of +#' `obj.from` that should be copied to `obj.to`. +#' @param overwrite Logical indicating whether to overwrite elements in `obj.to` that already exist. +#' If `TRUE`, existing elements will be overwritten with a warning; if `FALSE`, the function will +#' stop with an error if it tries to copy an element that already exists in `obj.to`. +#' @return Returns the modified destination Seurat object (`obj.to`) with the specified elements +#' added to or updated in its `@misc` slot. +#' @examples +#' # Assuming `obj1` and `obj2` are Seurat objects and you wish to copy specific elements +#' # from obj1 to obj2, possibly overwriting existing elements in obj2 +#' obj2 <- copyMiscElements(obj1, obj2, c("element1", "element2"), overwrite = TRUE) +#' +#' @export +copyMiscElements <- function(obj.from, obj.to, elements.needed, overwrite = TRUE) { + obj.from <- ww.get.1st.Seur.element(obj.from) + + stopifnot( + inherits(obj.from, "Seurat"), + inherits(obj.to, "Seurat") + ) + + # Check for missing elements in obj.to@misc + elements.from <- names(obj.from@misc) + missing <- setdiff(elements.needed, elements.from) + if (length(missing) > 0) { + warning("Missing elements in obj.from@misc: ", paste(missing, collapse = ", "), immediate. = TRUE) + } + + # Check for existing elements in obj.to@misc + elements.already.exisiting <- intersect(elements.needed, names(obj.to@misc)) + if (length(elements.already.exisiting) > 0) { + if (!overwrite) { + stop( + "The following elements already exist in obj.to@misc and 'overwrite' is FALSE: ", + paste(elements.already.exisiting, collapse = ", ") + ) + } else { + warning("Overwriting the following elements in obj.to@misc: ", + paste(elements.already.exisiting, collapse = ", "), + immediate. = TRUE + ) + } + } + + # Copy specified elements from obj.from to obj.to + existingElementsFrom <- intersect(elements.needed, names(obj.from@misc)) + for (element in existingElementsFrom) { + obj.to@misc[[element]] <- obj.from@misc[[element]] + } + iprint("@misc contains: ", names(obj.to@misc)) + + return(obj.to) +} + + +# _________________________________________________________________________________________________ +#' @title Copy Tools Slots from Multiple Seurat Objects +#' +#' @description This function copies the `@tools` slots from a list of Seurat objects into a new slot +#' of a target Seurat object. This allows for the aggregation of tools information from multiple +#' experiments or datasets into a single, consolidated Seurat object. +#' +#' @param ls.obj A list of Seurat objects from which the `@tools` slots will be copied. +#' @param obj.to The target Seurat object to which the `@tools` slots will be added. +#' @param overwrite A logical parameter that is kept for compatibility but not used in this version. +#' Its presence does not affect the function's behavior. +#' @param new.slot The name of the new slot within `obj.to@tools` where the copied `@tools` information +#' will be stored. This allows for the organization of copied tools under a specific label, facilitating +#' easy access and interpretation. +#' @return Returns the modified target Seurat object (`obj.to`) with a new `@tools` slot containing +#' the copied information from the list of Seurat objects. +#' @examples +#' # Assuming `ls.obj` is a list of Seurat objects and `obj.to` is a target Seurat object +#' obj.to <- copyCompleteToolsSlots(ls.obj, obj.to, overwrite = TRUE, new.slot = "per.experiment") +#' @export +copyCompleteToolsSlots <- function(ls.obj, obj.to, overwrite = TRUE, new.slot = "per.experiment") { + stopifnot( + inherits(obj.to, "Seurat"), + all(sapply(ls.obj, inherits, "Seurat")) + ) + + ls.tools <- lapply(ls.obj, function(x) x@tools) + obj.to@tools[[new.slot]] <- ls.tools + + return(obj.to) +} + + + + +# _________________________________________________________________________________________________ +# Subsetting the Seurat object ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Subset a Seurat Object by Identity +#' +#' @description Subsets a Seurat object based on a specified identity column and values. It allows +#' for an optional inversion of the selection. +#' +#' @param obj A Seurat object. Default: `NULL`. +#' @param ident The name of the identity column to use for subsetting. It is recommended to +#' specify this explicitly. Default: First entry from the result of `GetClusteringRuns()`. +#' @param identGroupKeep A vector of cluster values for which cells should be matched and retained. +#' This parameter does not have a default value and must be specified. +#' @param invert A logical indicating whether to invert the selection, keeping cells that do +#' not match the specified identity values. Default: `FALSE`. +#' +#' @return A Seurat object subsetted based on the specified identity and identity values. +#' +#' @examples +#' # Assuming `seurat_obj` is your Seurat object and you want to subset based on cluster 1 +#' subsetted_obj <- subsetSeuObjByIdent( +#' obj = seurat_obj, ident = "your_ident_column", +#' identGroupKeep = c(1), invert = FALSE +#' ) +#' +#' @importFrom tictoc tic toc +#' @export +subsetSeuObjByIdent <- function( + obj = combined.obj, + ident = GetClusteringRuns()[1], + identGroupKeep, + invert = FALSE) { + + tic("subsetSeuObjByIdent") + # Input checks + stopifnot( + "obj must be a Seurat object" = inherits(obj, "Seurat"), + "ident must be a character and exist in obj@meta.data" = is.character(ident) && ident %in% colnames(obj@meta.data), + "identGroupKeep must exist in ident" = all(identGroupKeep %in% unique(obj@meta.data[[ident]])) + ) + + identGroupKeep <- if (invert) { + setdiff(unique(obj@meta.data[[ident]]), identGroupKeep) + } else { + identGroupKeep + } + message( + "ident: ", ident, " | ", length(identGroupKeep), " ID-groups selected: ", kppc(head(identGroupKeep)), + "... | invert: ", invert, "\n" + ) + + idx.cells.pass <- obj@meta.data[[ident]] %in% identGroupKeep + cellz <- colnames(obj)[idx.cells.pass] + + PCT <- percentage_formatter(length(cellz) / ncol(obj)) + message( + PCT, " or ", length(cellz), " cells are selected from ", ncol(obj), + ", using values (max 20): ", kppc(head(identGroupKeep, 20)), ", from ", ident, "." + ) + + x <- subset(x = obj, cells = cellz) + toc() + return(x) +} + + +# _________________________________________________________________________________________________ +#' @title downsampleSeuObj +#' +#' @description Subset a compressed Seurat object and save it in the working directory. +#' @param obj A Seurat object to subset. Default: the i-th element of the list 'ls.Seurat'. +#' @param fractionCells The fraction of the object's data to keep. Default: 0.25. +#' @param nCells If set to a number greater than 1, indicates the absolute number of cells to keep. +#' If FALSE, the function uses 'fractionCells' to determine the number of cells. Default: `FALSE`.. +#' @param seed A seed for random number generation to ensure reproducible results. Default: 1989. +#' @export +#' @importFrom Stringendo percentage_formatter + +downsampleSeuObj <- function(obj = ls.Seurat[[i]], fractionCells = 0.25, nCells = FALSE, + seed = 1989) { + set.seed(seed) + if (isFALSE(nCells)) { + cellIDs.keep <- sampleNpc(metaDF = obj@meta.data, pc = fractionCells) + iprint( + length(cellIDs.keep), "or", Stringendo::percentage_formatter(fractionCells), + "of the cells are kept. Seed:", seed + ) + } else if (nCells > 1) { + nKeep <- min(ncol(obj), nCells) + # print(nKeep) + cellIDs.keep <- sample(colnames(obj), size = nKeep, replace = FALSE) + if (nKeep < nCells) { + iprint( + "Only", nCells, + "cells were found in the object, so downsampling is not possible." + ) + } + } + obj <- subset(x = obj, cells = cellIDs.keep) # downsample + return(obj) +} + +# _________________________________________________________________________________________________ +#' @title downsampleSeuObj.and.Save +#' +#' @description Subset a compressed Seurat Obj and save it in wd. # +#' @param obj Seurat object, Default: ORC +#' @param fraction Fractional size to downsample to. Default: 0.25 +#' @param seed random seed used, Default: 1989 +#' @param min.features Minimum features +#' @param dir Directory to save to. Default: OutDir +#' @param suffix A suffix added to the filename, Default: '' +#' @export +downsampleSeuObj.and.Save <- function( + obj = ORC, fraction = 0.25, seed = 1989, dir = OutDir, + min.features = p$"min.features", suffix = fraction, + nthreads = .getNrCores()) { + obj_Xpc <- downsampleSeuObj(obj = obj, fractionCells = fraction, seed = seed) + nr.cells.kept <- ncol(obj_Xpc) + + # Seurat.utils:::.saveRDS.compress.in.BG(obj = obj_Xpc, fname = ppp(paste0(dir, substitute(obj)), + # suffix, nr.cells.kept, 'cells.with.min.features', min.features,"Rds" ) ) + xsave(obj_Xpc, + suffix = ppp(suffix, nr.cells.kept, "cells.with.min.features", min.features), + nthreads = nthreads, project = getProject(), showMemObject = TRUE, saveParams = FALSE + ) +} + + + +# _________________________________________________________________________________________________ +#' @title Sample max number of Cells From each identity in a Seurat Object +#' +#' @description This function samples a specified maximum number of cells from each identity class +#' in a Seurat object, in the meta.data. It ensures that the sampling does not exceed the total +#' number of cells available per identity. +#' +#' @param obj A Seurat object from which cells are to be sampled. +#' @param ident A character vector specifying the identity class from which cells are to be sampled. +#' @param max.cells A positive integer indicating the maximum number of cells to sample from each identity class. +#' @param verbose Logical indicating if messages about the sampling process should be printed to the console. Defaults to TRUE. +#' @param replacement.thr A numeric value between 0 and 1 indicating the percentage of cells to sample from each identity class. Defaults to 0.05. +#' @param dsample.to.repl.thr Logical indicating if sampling should be done with replacement. Defaults to FALSE. +#' @param plot_stats Logical indicating to plot a barplot. +#' @param seed An integer to set the seed for reproducibility. +#' +#' +#' @return Returns a Seurat object containing only the sampled cells. +#' +#' @details This function checks for the presence of the specified identity class within the object's metadata. +#' If the number of cells within any identity class is less than or equal to the `max.cells` parameter, +#' all cells from that class are retained. Otherwise, a random sample of `max.cells` is taken from the class. +#' The function updates the identity of the cells in the returned Seurat object to reflect the sampled cells. +#' If `verbose` is TRUE, it prints the total number of cells sampled and provides a visual summary of the fraction +#' of cells retained per identity class. +#' +#' @examples +#' # Assuming `seuratObj` is a Seurat object with identities stored in its metadata +#' sampledSeuratObj <- downsampleSeuObjByIdentAndMaxcells(obj = seuratObj, ident = "cellType", max.cells = 100) +#' +#' @importFrom CodeAndRoll2 df.col.2.named.vector +#' +#' @export +#' +downsampleSeuObjByIdentAndMaxcells <- function(obj, + ident = GetNamedClusteringRuns()[1], + max.cells = min(table(obj[[ident]])), + verbose = TRUE, + replacement.thr = 0.05, + dsample.to.repl.thr = (max.cells / ncol(obj)) < replacement.thr, # if less than 5% of cells are sampled, sample with replacement + plot_stats = TRUE, + seed = 1989) { + stopifnot( + "obj must be a Seurat object" = inherits(obj, "Seurat"), + "ident must be a character and exist in obj@meta.data" = is.character(ident) && ident %in% colnames(obj@meta.data), + "max.cells must be a positive integer" = is.numeric(max.cells) && max.cells > 0, + max.cells < ncol(obj) + ) + + data <- CodeAndRoll2::df.col.2.named.vector(obj[[ident]]) + uniqueCategories <- unique(data) + + set.seed(seed) + if (dsample.to.repl.thr) { + max.cells <- round(ncol(obj) * replacement.thr) + msg <- percentage_formatter(replacement.thr, + suffix = paste("of the data or", max.cells, "of cells."), + prefix = "Sampling with replacement to:" + ) + message(msg) + } + + # Sample cells from each identity class + sampledNames <- lapply(uniqueCategories, function(category) { + namesInCategory <- names(data[data == category]) + if (length(namesInCategory) <= max.cells) { + # If the number of cells in the category is less than or equal to max.cells, return all cells + return(namesInCategory) + } else { + return(sample(namesInCategory, max.cells)) + } + }) + + sampledCells <- unlist(sampledNames) + + Idents(obj) <- ident + obj2 <- subset(x = obj, cells = sampledCells) + + subb <- paste0("From ", ncol(obj), " reduced to ", ncol(obj2), " cells.") + message(subb) + + if (verbose) { + message("Total cells sampled: ", length(sampledCells)) + print(table(data)) + + nr_remaining_cells <- orig_cells <- table(data) + nr_remaining_cells[nr_remaining_cells > max.cells] <- max.cells + fr_remaining_per_cluster <- iround(nr_remaining_cells / orig_cells) + print(fr_remaining_per_cluster) + } + if (plot_stats) { + pobj <- qbarplot( + vec = fr_remaining_per_cluster, subtitle = subb, label = fr_remaining_per_cluster, + ylab = "fr. of cells", save = FALSE + ) + print(pobj) + } + return(obj2) +} + +# _________________________________________________________________________________________________ +#' @title Relabel Small Categories / Clusters +#' +#' @description +#' Relabels small categories in a specified metadata column of a Seurat object. Categories with +#' cell counts less than a minimum count are relabeled to a specified label. The function adds +#' a new metadata column with the updated labels. +#' +#' @param obj Seurat object. The Seurat object containing the metadata. +#' @param col_in Character string. Name of the metadata column to process. +#' @param backup_col_name Character string. Name of the new metadata column where to backup the original values. +#' Default: `ppp(col_in, "orig")`. +#' @param min_count Numeric. Minimum number of cells required for a category to be retained. +#' Categories with counts less than this number will be relabeled. Default: 100 +#' @param small_label Character string. Label to assign to small categories. Default: "Other". +#' @param v Logical. If `TRUE`, prints verbose output. Default: `TRUE`. +#' +#' @return Seurat object. The modified Seurat object with the new metadata column added. +#' +#' @examples +#' # Assuming 'seurat_obj' is your Seurat object +#' seurat_obj <- RelabelSmallCategories( +#' obj = seurat_obj, +#' col_in = "cell_type", +#' min_count = 50, +#' small_label = "MinorType", +#' v = TRUE +#' ) +#' + +RelabelSmallCategories <- function(obj, col_in, backup_col_name = ppp(col_in, "orig"), min_count = 100, small_label = "Other", v = T) { + # Input assertions + stopifnot( + inherits(obj, "Seurat"), # Check if obj is a Seurat object + is.character(col_in), length(col_in) == 1, # col_in is a single string + col_in %in% colnames(obj@meta.data), # col_in exists in metadata + is.character(backup_col_name), length(backup_col_name) == 1, # backup_col_name is a single string + is.numeric(min_count), min_count > 0, # min_count is a positive number + is.character(small_label), length(small_label) == 1, # small_label is a single string + is.logical(v), length(v) == 1 # v is a single logical value + ) + + message('backup_col_name: ', backup_col_name) + + categories <- obj@meta.data[[backup_col_name]] <- obj@meta.data[[col_in]] # Extract the specified metadata column + category_counts <- table(categories) # Count occurrences of each category + small_categories <- names(category_counts[category_counts < min_count]) # Identify small categories + new_categories <- as.character(categories) # Copy original categories + new_categories[new_categories %in% small_categories] <- small_label # Relabel small categories + obj@meta.data[[col_in]] <- new_categories # Add new column to metadata + + if (v) { # Verbose output + total_cells <- length(categories) + num_small_categories <- length(small_categories) + num_large_categories <- length(category_counts) - num_small_categories + percent_small_cells <- sum(category_counts[small_categories]) / total_cells * 100 + message(total_cells, " Total cells.") + message(num_small_categories, " Categories relabeled to ", small_label) + message(num_large_categories, " of ", length(category_counts), " Categories retained.") + message(sprintf("Cells in relabeled categories: %d (%.2f%% of total)", sum(category_counts[small_categories]), percent_small_cells)) + } + + return(obj) # Return the modified Seurat object +} + + +# _________________________________________________________________________________________________ +#' @title Remove Residual Small Clusters from a Seurat Object +#' +#' @description Removes clusters containing fewer cells than specified by `max.cells` +#' from a Seurat object. This function is particularly useful after subsetting a dataset, +#' where small, possibly unrepresentative clusters may remain. +#' +#' @param obj Seurat object from which small clusters will be removed; Default: `combined.obj`. +#' @param identitites Vector of clustering identities to examine for small clusters; +#' Default: `GetClusteringRuns(obj)`. +#' @param max.cells Maximum number of cells a cluster can contain to still be considered for removal. +#' Default: The lesser of 0.5% of the dataset or 10 cells. +#' @param plot.removed Logical indicating if a umap of the cells removed should be plotted. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- removeResidualSmallClusters(obj = combined.obj) +#' } +#' } +#' +#' @export +removeResidualSmallClusters <- function( + obj = combined.obj, + identitites = GetClusteringRuns(obj, pat = "*snn_res.[0-9].[0-9]$")[1:5], + max.cells = max(round((ncol(obj)) / 2000), 5), + plot.removed = TRUE) { + # + META <- obj@meta.data + all.cells <- rownames(META) + + message("max.cells: ", max.cells, " | Scanning over these identities:") + small.clusters <- cells.to.remove <- CodeAndRoll2::list.fromNames(identitites) + + for (i in 1:length(identitites)) { + colX <- identitites[i] + print(colX) + tbl <- table(META[[colX]]) + + small.clusters[[i]] <- which_names(tbl <= max.cells) + cells.to.remove[[i]] <- all.cells[which(META[[colX]] %in% small.clusters[[i]])] + if (length(cells.to.remove[[i]])) { + iprint( + length(cells.to.remove[[i]]), "cells in small clusters:", small.clusters[[i]], + "| Cell counts:", tbl[small.clusters[[i]]] + ) + } + } + + all.cells.2.remove <- unique(unlist(cells.to.remove)) + if (plot.removed) { + SBT <- paste(length(all.cells.2.remove), "cells removed from small clusters across", length(identitites), "identities.") + pobj <- clUMAP( + obj = obj, ident = GetClusteringRuns(obj = obj)[1], sub = SBT, + cells.highlight = all.cells.2.remove + ) + print(pobj) + } + + if (length(all.cells.2.remove)) { + iprint( + ">>> a total of", length(all.cells.2.remove), + "cells are removed which belonged to a small cluster in any of the identities." + ) + } else { + iprint(">>> No cells are removed because belonging to small cluster.") + } + + cells.2.keep <- setdiff(all.cells, all.cells.2.remove) + obj <- subset(x = obj, cells = cells.2.keep) + + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title dropLevelsSeurat +#' +#' @description Drop unused levels from `factor` variables in a Seurat object's meta.data. +#' @param obj A Seurat object. +#' @param verbose Logical. Whether to print a message indicating which levels are being dropped. +#' @param only Character vector. Explicit list of columns to only in the operation. +#' @param exclude Character vector. Names of columns to exclude from the operation.#' +#' +#' @export +dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character = FALSE, + only = NULL, exclude = NULL) { + + stopifnot(is(obj, "Seurat")) + META <- obj@meta.data + names.meta <- colnames(obj@meta.data) + + stopifnot(is.logical(verbose), + is.logical(also.character), + is.null(only) | only %in% names.meta, + is.null(exclude) | exclude %in% names.meta + ) + + colclasses <- sapply(META, class) + drop_in_these <- names(colclasses[colclasses %in% "factor"]) + + if (!is.null(only)) drop_in_these <- only + if (!is.null(exclude)) drop_in_these <- setdiff(drop_in_these, exclude) + + if (verbose) message("Dropping levels in ", length(drop_in_these), " identities:\n", + kppc(drop_in_these)) + + for (i in 1:length(drop_in_these)) { + colX <- drop_in_these[i] + META[[colX]] <- droplevels(META[[colX]]) + } + + obj@meta.data <- META + return(obj) +} + + +# ____________________________________________________________________ +#' @title Remove Clusters and Drop Levels from a List of Seurat Objects +#' +#' @description This function removes residual small clusters from specified Seurat objects and +#' drops levels in factor-like metadata. +#' @param ls_obj A list of Seurat objects. +#' @param object_names A character vector containing the names of the Seurat objects to process. +#' Default: names of all objects in the `ls_obj`. +#' @param indices A numeric vector indicating which datasets to process by their position in +#' the `object_names` vector. By default, it processes the second and third datasets. +#' @param ... Additional parameters passed to the `removeResidualSmallClusters` function. +#' +#' @details This function applies `removeResidualSmallClusters` and `dropLevelsSeurat` to +#' the Seurat objects specified by the `indices` in the `object_names`. +#' It operates in place, modifying the input `ls_obj` list. +#' +#' @return The function returns the modified list of Seurat objects. +#' @examples +#' \dontrun{ +#' # Process the 2nd and 3rd datasets +#' removeClustersAndDropLevels(ls_obj, indices = c(2, 3)) +#' } +#' +#' @export +removeClustersAndDropLevels <- function(ls_obj, + object_names = names(ls_obj), + indices = 2:3, ...) { + # + for (index in indices) { + dataset_name <- object_names[index] + obj <- ls_obj[[dataset_name]] + obj <- removeResidualSmallClusters(obj = obj, identitites = GetClusteringRuns(obj), ...) + obj <- dropLevelsSeurat(obj) + ls_obj[[dataset_name]] <- obj + } + return(ls_obj) +} + + + + +# _________________________________________________________________________________________________ +#' @title Remove Cells by Dimension Reduction +#' +#' @description This function applies a cutoff in the specified dimension of a given +#' dimension reduction (UMAP, PCA, or t-SNE) to remove cells. +#' @param reduction A string specifying the dimension reduction technique to be used +#' ('umap', 'pca', or 'tsne'). Default: 'umap'. +#' @param umap_dim An integer specifying which dimension (axis) to apply the cutoff. Default: 1. +#' @param obj A Seurat object. Default: 'combined.obj'. +#' @param cutoff A numerical value indicating the cutoff value for the specified dimension. Default: 0. +#' @param cut_below A logical value indicating whether to remove cells below (TRUE) or +#' above (FALSE) the cutoff line. Default: `TRUE`.. +#' @param only_plot_cutoff Simulate and plot cutoff only. +#' @param ... Any other parameters to be passed to internally called functions. +#' @return A Seurat object with cells removed according to the specified cutoff. +#' @export +removeCellsByUmap <- function( + reduction = "umap", + umap_dim = 1, + obj = combined.obj, + cutoff = 0, + cut_below = TRUE, + only_plot_cutoff = FALSE, + ...) { + # Plot cells + sfx <- if (cut_below) "below" else "above" + p <- clUMAP(obj = obj, save.plot = FALSE, sub = paste0("cutoff ", sfx, ": ", cutoff), ...) + + # Add cutoff line to plot + if (umap_dim == 1) { + p <- p + geom_vline(xintercept = cutoff) + } else if (umap_dim == 2) { + p <- p + geom_hline(yintercept = cutoff) + } + print(p) + qqSave(p, fname = kpp("UMAP.with.cutoff", umap_dim, sfx, cutoff, "png"), h = 7, w = 7) + + if (!only_plot_cutoff) { + # Retrieve cell embeddings + cell_embedding <- obj@reductions[[reduction]]@cell.embeddings + all_cells <- rownames(cell_embedding) + stopifnot(ncol(cell_embedding) > 0) + embedding_dim_x <- cell_embedding[, umap_dim] + + # Determine cells to remove based on cutoff + cells_to_remove <- if (cut_below) which_names(embedding_dim_x < cutoff) else which_names(embedding_dim_x >= cutoff) + + # Report on cells removed + if (length(cells_to_remove)) { + iprint(">>> A total of", length(cells_to_remove), "cells are removed which fell on UMAP aside cutoff:", cutoff) + } else { + iprint(">>> No cells are removed because of the UMAP dimension cutoff.") + } + + # Subset object to include only cells not removed + cells_to_keep <- setdiff(all_cells, cells_to_remove) + obj <- subset(x = obj, cells = cells_to_keep) + } # only_plot_cutoff + return(obj) +} + + + + +# _________________________________________________________________________________________________ +# Downsampling Lists of Seurat objects ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Downsample a List of Seurat Objects to a Specific Number of Cells +#' +#' @description Downsampling each Seurat object in a list to a specified number of cells. This function is +#' particularly useful for creating smaller, more manageable subsets of large single-cell datasets for +#' preliminary analyses or testing. +#' +#' @param ls.obj List of Seurat objects to be downsampled; Default: `ls.Seurat`. +#' @param NrCells Target number of cells to downsample each Seurat object to. +#' @param save_object Logical indicating whether to save the downsampled Seurat objects using `isaveRDS` +#' or to return them; Default: `FALSE`.. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' downsampledSeuratList <- downsampleListSeuObjsNCells( +#' ls.obj = +#' list(yourSeuratObj1, yourSeuratObj2), NrCells = 2000 +#' ) +#' downsampledSeuratList <- downsampleListSeuObjsNCells(NrCells = 200) +#' } +#' } +#' +#' @export +#' @importFrom tictoc tic toc +#' @importFrom Stringendo percentage_formatter +#' @importFrom foreach foreach %dopar% getDoParRegistered + +downsampleListSeuObjsNCells <- function( + ls.obj = ls.Seurat, NrCells = p$"dSample.Organoids", + save_object = FALSE) { + # Check if 'ls_obj' is a list of Seurat objects and 'obj_IDs' is a character vector of the same length + if (!is.list(ls.obj) & inherits(ls.obj, "Seurat")) ls.obj <- list(ls.obj) + stopifnot(is.list(ls.obj) & all(sapply(ls.obj, function(x) inherits(x, "Seurat")))) + + names.ls <- names(ls.obj) + n.datasets <- length(ls.obj) + iprint(NrCells, "cells") + + tictoc::tic("downsampleListSeuObjsNCells") + if (foreach::getDoParRegistered()) { + ls.obj.downsampled <- foreach::foreach(i = 1:n.datasets) %dopar% { + iprint(names(ls.obj)[i], Stringendo::percentage_formatter(i / n.datasets, digitz = 2)) + downsampleSeuObj(obj = ls.obj[[i]], nCells = NrCells) + } + names(ls.obj.downsampled) <- names.ls + } else { + ls.obj.downsampled <- list.fromNames(names.ls) + for (i in 1:n.datasets) { + iprint(names(ls.obj)[i], Stringendo::percentage_formatter(i / n.datasets, digitz = 2)) + ls.obj.downsampled[[i]] <- downsampleSeuObj(obj = ls.obj[[i]], nCells = NrCells) + } + } # else + tictoc::toc() + + print(head(sapply(ls.obj, ncol))) + print(head(sapply(ls.obj.downsampled, ncol))) + + if (save_object) { + isave.RDS(obj = ls.obj.downsampled, suffix = ppp(NrCells, "cells"), inOutDir = TRUE) + } else { + return(ls.obj.downsampled) + } +} + + + +# _________________________________________________________________________________________________ +#' @title Downsample a List of Seurat Objects to a Fraction +#' +#' @description Downsampling a list of Seurat objects to a specified fraction of their original size. +#' This is useful for reducing dataset size for quicker processing or testing workflows. +#' +#' @param ls.obj List of Seurat objects to be downsampled; Default: `ls.Seurat`. +#' @param fraction Fraction of cells to retain in each Seurat object; Default: 0.1. +#' @param save_object Logical indicating whether to save the downsampled Seurat objects using +#' `isaveRDS` or return them; Default: `FALSE`.. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' downsampled_objs <- downsampleListSeuObjsPercent(ls.obj = yourListOfSeuratObjects, fraction = 0.1) +#' } +#' } +#' +#' @export +#' @importFrom tictoc tic toc +#' @importFrom Stringendo percentage_formatter +#' @importFrom foreach foreach %dopar% getDoParRegistered +#' +downsampleListSeuObjsPercent <- function( + ls.obj = ls.Seurat, + fraction = 0.1, + seed = 1989, + save_object = FALSE) { + # Check if 'ls_obj' is a list of Seurat objects and 'obj_IDs' is a character vector of the same length + if (!is.list(ls.obj) & inherits(ls.obj, "Seurat")) ls.obj <- list(ls.obj) + stopifnot(is.list(ls.obj) & all(sapply(ls.obj, function(x) inherits(x, "Seurat")))) + + names.ls <- names(ls.obj) + n.datasets <- length(ls.obj) + iprint(fraction, "fraction") + + tictoc::tic("downsampleListSeuObjsPercent") + if (foreach::getDoParRegistered()) { + ls.obj.downsampled <- foreach::foreach(i = 1:n.datasets) %dopar% { + downsampleSeuObj(obj = ls.obj[[i]], fractionCells = fraction) + } + names(ls.obj.downsampled) <- names.ls + } else { + ls.obj.downsampled <- list.fromNames(names.ls) + for (i in 1:n.datasets) { + cells <- round(ncol(ls.obj[[1]]) * fraction) + iprint(names(ls.obj)[i], cells, "cells=", Stringendo::percentage_formatter(i / n.datasets, digitz = 2)) + ls.obj.downsampled[[i]] <- downsampleSeuObj(obj = ls.obj[[i]], fractionCells = fraction, seed = seed) + } + } + tictoc::toc() # else + + NrCells <- sum(sapply(ls.obj, ncol)) + + print(head(sapply(ls.obj, ncol))) + print(head(sapply(ls.obj.downsampled, ncol))) + if (save_object) { + isave.RDS(obj = ls.obj.downsampled, suffix = ppp(NrCells, "cells"), inOutDir = TRUE) + } else { + return(ls.obj.downsampled) + } +} + + +# _________________________________________________________________________________________________ +# DGEA ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +#' @title Add.DE.combined.score +#' +#' @description Add a combined score to differential expression (DE) results. The score is +#' calculated as log-fold change (LFC) times negative logarithm of scaled +#' p-value (LFC * -log10( p_cutoff / pval_scaling )). +#' @param df A data frame that holds the result of a differential gene expression analysis, +#' typically obtained via the 'FindAllMarkers' function. Default: df.markers. +#' @param p_val_min The minimum p-value considered. All values below this threshold are set to +#' this value. Default: 1e-25. +#' @param pval_scaling The value to scale p-values by in the calculation of the combined score. Default: 0.001. +#' @param colP The name of the column in the input data frame that holds p-values. Default: 'p_val'. +#' @param colLFC The name of the column in the input data frame that holds log-fold change values. +#' By default, it selects the first column not named "avg_logFC" or "avg_log2FC". +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' df.markers <- Add.DE.combined.score(df.markers) +#' } +#' } +#' @export +Add.DE.combined.score <- function( + df = df.markers, p_val_min = 1e-25, pval_scaling = 0.001, colP = "p_val", + colLFC = CodeAndRoll2::grepv(pattern = c("avg_logFC|avg_log2FC"), x = colnames(df), perl = TRUE) + # , colLFC = "avg_log2FC" + ) { # Score = -LOG10(p_val) * avg_log2FC + p_cutoff <- SmallestNonAboveX(vec = df[[colP]], X = p_val_min) + df$"combined.score" <- round(df[[colLFC]] * -log10(p_cutoff / pval_scaling)) + return(df) +} + + + + +# _________________________________________________________________________________________________ +#' @title Save Top 25 Markers per Cluster +#' +#' @description Stores the top 25 markers for each cluster identified in a Seurat object, based on +#' the `avg_log2FC` from the output table of `FindAllMarkers()`. The result is saved under `@misc$df.markers$res...`, +#' rounding insignificant digits to three decimal places. +#' +#' @param obj Seurat object to update with top 25 markers information; Default: `combined.obj`. +#' @param df_markers Data frame containing results from differential gene expression analysis +#' via `FindAllMarkers()`, specifying significant markers across clusters; Default: `df.markers`. +#' @param res Clustering resolution at which the markers were identified; Default: 0.5. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- StoreTop25Markers(obj = combined.obj, df_markers = df.markers, res = 0.5) +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{FindAllMarkers}}, \code{\link[dplyr]{top_n}} +#' +#' @export +#' @importFrom Seurat FindAllMarkers +#' @importFrom dplyr group_by top_n select arrange + +StoreTop25Markers <- function( + obj = combined.obj, + df_markers = df.markers, res = 0.5) { + top25.markers <- + df_markers |> + group_by(cluster) |> + top_n(n = 25, wt = avg_2logFC) |> + dplyr::select(gene) |> + col2named.vec.tbl() |> + splitbyitsnames() + + obj@misc$"top25.markers"[[ppp("res", res)]] <- top25.markers + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Store All Differential Expression Markers +#' +#' @description Saves the complete output table from `FindAllMarkers()` to a Seurat object, facilitating +#' easy access to differential expression analysis results. This function rounds numerical values to a +#' specified number of digits to maintain readability and manage file sizes. +#' +#' @param obj Seurat object to update with differential expression markers; Default: `combined.obj`. +#' @param df_markers Data frame containing the results from differential gene expression analysis +#' (`FindAllMarkers()` output); Default: `df.markers`. +#' @param res Clustering resolution identifier for storing and referencing the markers; Default: 0.5. +#' @param digit Number of significant digits to retain in numerical values; Default: 3. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- StoreAllMarkers(obj = combined.obj, df_markers = df.markers, res = 0.5) +#' } +#' } +#' +#' @export +StoreAllMarkers <- function( + obj = combined.obj, + df_markers = df.markers, res = 0.5, digit = c(0, 3)[2]) { + if (digit) df_markers[, 1:5] <- signif(df_markers[, 1:5], digits = digit) + obj@misc$"df.markers"[[ppp("res", res)]] <- df_markers + iprint("DF markers are stored under:", "obj@misc$df.markers$", ppp("res", res)) + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Get Top Differential Expression Genes Data Frame +#' +#' @description Retrieves a data frame of the top N differentially expressed genes from +#' differential gene expression analysis results, offering an option to exclude certain genes +#' based on patterns. +#' +#' @param dfDE Data frame containing the results of differential gene expression analysis +#' (e.g., output from `FindAllMarkers()`); Default: `df.markers`. +#' @param n Number of top markers to retrieve per cluster; Default: `p$n.markers`. +#' @param order.by Priority column for sorting markers before selection, such as `"avg_log2FC"`; +#' Default: `"avg_log2FC"`. +#' @param exclude Vector of regex patterns to exclude genes from the top markers list; +#' Default: `c("^AL*|^AC*|^LINC*")`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' topMarkersDF <- GetTopMarkersDF(dfDE = df.markers, n = 3) +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{FindAllMarkers}}, \code{\link[dplyr]{arrange}}, +#' \code{\link[dplyr]{filter}}, \code{\link[dplyr]{group_by}} +#' +#' @export +#' @importFrom dplyr arrange group_by slice select filter + +GetTopMarkersDF <- function( + dfDE = df.markers, + n = p$"n.markers", order.by = c("avg_log2FC", "p_val_adj")[1], + exclude = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]") + ) { + "Works on active Idents() -> thus we call cluster" + combined_pattern <- paste(exclude, collapse = "|") + + TopMarkers <- dfDE |> + dplyr::filter(!grepl(combined_pattern, gene, perl = TRUE)) |> + arrange(desc(!!as.name(order.by))) |> + dplyr::group_by(cluster) |> + dplyr::slice(1:n) |> + dplyr::select(cluster, gene, avg_log2FC) + + return(TopMarkers) +} + + +# _________________________________________________________________________________________________ +#' @title Get Top Differential Expression Markers from DGEA Results +#' +#' @description Retrieves the top N differentially expressed genes from the results of a differential +#' gene expression analysis, such as that provided by `FindAllMarkers()`. +#' +#' @param dfDE Data frame containing differential expression analysis results; Default: `df.markers`. +#' @param n Number of top markers to retrieve for each cluster; Default: `p$n.markers`. +#' @param order.by Column by which to sort the markers before selection, typically prioritizing +#' markers by significance or effect size; Default: `"avg_log2FC"`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' topMarkers <- GetTopMarkers(df = df.markers, n = 3) +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{FindAllMarkers}}, \code{\link[dplyr]{arrange}}, \code{\link[dplyr]{group_by}} +#' +#' @export +#' @importFrom dplyr arrange group_by slice select + +GetTopMarkers <- function(dfDE = df.markers, + n = p$"n.markers", + order.by = c("combined.score", "avg_log2FC", "p_val_adj")[2]) { + message("Works on active Idents()") # thus we call cluster + TopMarkers <- dfDE |> + dplyr::arrange(desc(!!as.name(order.by))) |> + dplyr::group_by(cluster) |> + dplyr::slice(1:n) |> + dplyr::select(gene) |> + CodeAndRoll2::col2named.vec.tbl() + + return(TopMarkers) +} + + + + +# _________________________________________________________________________________________________ +#' @title AutoLabelTop.logFC +#' +#' @description Create a new "named identity" column in the metadata of a Seurat object, +#' with `Ident` set to a clustering output matching the `res` parameter of the function. +#' It requires the output table of `FindAllMarkers()`. If you used `StoreAllMarkers()` +#' is stored under `@misc$df.markers$res...`, which location is assumed by default. +#' @param obj A Seurat object, with default value `combined.obj`. +#' @param group.by The clustering group to be used, defaults to the first entry by +#' `GetClusteringRuns()`. +#' @param res Clustering resolution tag. Default: extracted from `group.by`. +#' @param plot.top.genes Logical indicating whether to show a plot, default is `TRUE`. +#' @param suffix Suffix for the naming, defaults to the value of `res`. +#' @param order.by Sorting criterion for the output tibble, defaults to the second element +#' of `c("combined.score", "avg_log2FC", "p_val_adj")`. +#' @param exclude A vector of regular expressions to specify genes to exclude, with +#' default value `c("^AL*|^AC*|^LINC*|^C[0-9]+orf[0-9]*")`. +#' @param df_markers Data frame resulting from DGEA analysis (`FindAllMarkers`). The default +#' is `combined.obj@misc$df.markers[[paste0("res.", res)]]`. +#' @param plotEnrichment Logical indicating whether to plot enrichment, default is `TRUE`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- AutoLabelTop.logFC() +#' combined.obj$"cl.names.top.gene.res.0.5" +#' } +#' } + +#' @export +AutoLabelTop.logFC <- function( + obj = combined.obj, + group.by, + res = stringr::str_extract(group.by, "\\d+\\.\\d+"), + plot.top.genes = TRUE, + suffix = res, + order.by = c("combined.score", "avg_log2FC", "p_val_adj")[2], + exclude = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}", + "^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+", + "[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$", + "^MIR[1-9]", "^SNHG[1-9]"), + df_markers = obj@misc$"df.markers"[[paste0("res.", res)]], + plotEnrichment = TRUE) { + + message(group.by) + message(" > Running AutoLabelTop.logFC...") + + stopifnot( + !is.null("df_markers"), + order.by %in% colnames(df_markers) + ) + + df.top.markers <- GetTopMarkersDF(dfDE = df_markers, order.by = order.by, n = 1, exclude = exclude) + + # Enrichment plot ______________________________________________________________ + if (plotEnrichment) { + top_log2FC <- df.top.markers$"avg_log2FC" + names(top_log2FC) <- ppp(df.top.markers$"cluster", df.top.markers$"gene") + ggExpress::qbarplot(top_log2FC, plotname = "The strongest fold change by cluster", + label = iround(top_log2FC), + subtitle = group.by, + ylab = "avg_log2FC", xlab = "clusters", + hline = 2, + suffix = group.by + ) + } + + top.markers <- col2named.vec.tbl(df.top.markers[, 1:2]) + + obj@misc[[ppp("top.markers.res", res)]] <- top.markers + + ids_CBC <- deframe(obj[[group.by]]) + ids <- unique(ids_CBC) + + # Check if all clusters have DE-genes ____________________________________________________ + if (length(ids) != length(top.markers)) { + warning("Not all clusters returned DE-genes!", immediate. = TRUE) + missing <- setdiff(ids, names(top.markers)) + names(missing) <- missing + iprint("missing:", missing) + top.markers <- sortbyitsnames(c(top.markers, missing)) + } + + top.markers.ID <- ppp(names(top.markers), top.markers) + names(top.markers.ID) <- names(top.markers) + named.group.by <- top.markers.ID[ids_CBC] + + # Check if the clustering was ordered _____________________________________________________ + sfx.ord <- ifelse(grepl("ordered", group.by), group.by, "") + namedIDslot <- sppp("cl.names.top.gene.", sfx.ord) + + obj <- addMetaDataSafe(obj = obj, metadata = as.character(named.group.by), col.name = namedIDslot, overwrite = TRUE) + if (plot.top.genes) multiFeaturePlot.A4(list.of.genes = top.markers, suffix = suffix, obj = obj) + + return(obj) +} + + + + + + +# _________________________________________________________________________________________________ +#' @title AutoLabel.KnownMarkers +#' +#' @description Creates a new "named identity" column in the metadata of a Seurat object, +#' setting 'Ident' to a clustering output matching the 'res' parameter. +#' This function requires the output table of `FindAllMarkers()`. +#' If you used `StoreAllMarkers()`, the output is stored under `@misc$df.markers$res...`, +#' which is the default location. +#' @param obj A Seurat object to work with. Default: `combined.obj`. +#' @param topN The top 'N' genes to consider. Default: 1. +#' @param res The clustering resolution to use. Default: 0.5. +#' @param KnownMarkers A character vector containing known marker genes to be used for annotation. +#' Default: `c("TOP2A", "EOMES", "SLA", "HOPX", "S100B", "DLX6-AS1", "POU5F1", "SALL4", "DDIT4",` +#' `"PDK1", "SATB2", "FEZF2")`. +#' @param order.by Specifies the column to sort the output tibble by. +#' Default: 'combined.score' (First among "combined.score", "avg_log2FC", "p_val_adj"). +#' @param df_markers The data frame of markers. By default, it is stored under +#' `@misc$df.markers$res...` in the provided Seurat object. +#' Default: `combined.obj@misc$df.markers[[paste0("res.", res)]]`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- AutoLabel.KnownMarkers() +#' DimPlot.ClusterNames(ident = "cl.names.KnownMarkers.0.5") +#' } +#' } +#' @seealso +#' \code{\link[dplyr]{select}}, \code{\link[dplyr]{slice}} +#' @export +#' @importFrom dplyr select slice +AutoLabel.KnownMarkers <- function( + obj = combined.obj, topN = 1, res = 0.5, + KnownMarkers = c( `dl-EN` = "KAZN", `ul-EN` = "SATB2", `Immature neurons` = "SLA" + , Interneurons = "DLX6-AS1", Interneurons = "ERBB4", InterN_CGE = "SCGN" + , `Intermediate progenitor` = "EOMES" + , `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C + , `oRG` = "HOPX" , Astrocyte = "S100B" + , `Hypoxia/Stress` = "DDIT4", Glycolytic = "PDK1" + , `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A" + , `Mesenchyme` = "DCN", `Choroid.Plexus` = "TTR" + ), + order.by = c("combined.score", "avg_log2FC", "p_val_adj")[1], + df_markers = obj@misc$"df.markers"[[paste0("res.", res)]]) { + stopifnot(!is.null("df_markers")) + + lfcCOL <- CodeAndRoll2::grepv(pattern = c("avg_logFC|avg_log2FC"), x = colnames(df_markers), perl = TRUE) + keep <- unique(c(lfcCOL, "p_val_adj", "cluster", order.by, "gene")) + + + matching.clusters <- + df_markers |> + dplyr::select(keep) |> + arrange(desc(!!as.name(order.by))) |> + dplyr::filter(gene %in% KnownMarkers) |> + group_by(gene) |> + dplyr::slice(1:topN) |> + arrange(desc(!!as.name(order.by))) |> + # top_n(n = 1, wt = avg_log2FC) |> # Select the top cluster for each gene + arrange(cluster) + + print(matching.clusters) + + unique.matches <- + matching.clusters |> + group_by(cluster) |> # Select rows with unique values based on column "cluster" + distinct(cluster, .keep_all = TRUE) |> + dplyr::select(gene) + + print("Best matches:") + print(unique.matches) + + "Error Here" + "Error Here" + "Error Here" + "Error Here" + "Error Here" + "Error Here" + + top.markers.df <- GetTopMarkersDF(dfDE = df_markers, order.by = lfcCOL, n = 1) + top.markers <- top.markers.df |> col2named.vec.tbl() + + missing.annotations <- + top.markers.df |> + dplyr::filter(!cluster %in% unique.matches$cluster) # filter for clusters that do not have a unique label already + + named.annotations <- + rbind(unique.matches, missing.annotations) |> # merge the 2 df's + arrange(cluster) |> + CodeAndRoll2::col2named.vec.tbl() + + (top.markers.ID <- ppp(names(named.annotations), named.annotations)) + names(top.markers.ID) <- names(top.markers) + named.ident <- top.markers.ID[Idents(object = obj)] + + namedIDslot <- ppp("cl.names.KnownMarkers", res) + obj[[namedIDslot]] <- named.ident + return(obj) +} + + + +# _________________________________________________________________________________________________ +# Correlations _________________________ ---- +# _________________________________________________________________________________________________ + +#' @title Calculate Sparse Correlation Matrix +#' +#' @description Computes a sparse correlation matrix from a given sparse matrix input. This function is +#' useful for efficiently handling large datasets where most values are zero, facilitating the calculation +#' of both covariance and correlation matrices without converting to a dense format. +#' +#' @param smat A sparse matrix object, typically of class Matrix from the Matrix package. +#' @return A list with two elements: +#' * `cov`: The covariance matrix derived from the input sparse matrix. +#' * `cor`: The correlation matrix derived from the covariance matrix. +#' +#' @examples +#' \dontrun{ +#' library(Matrix) +#' smat <- Matrix(rnorm(1000), nrow = 100, sparse = TRUE) +#' cor_res <- sparse.cor(smat) +#' print(cor_res$cor) +#' } +#' +#' @export +#' @importFrom Matrix colMeans crossprod tcrossprod +#' @importFrom stats sd +sparse.cor <- function(smat) { + n <- nrow(smat) + cMeans <- colMeans(smat) + covmat <- (as.matrix(crossprod(smat)) - n * tcrossprod(cMeans)) / (n - 1) + sdvec <- sqrt(diag(covmat)) + cormat <- covmat / tcrossprod(sdvec) + list(cov = covmat, cor = cormat) +} + + +# _________________________________________________________________________________________________ +#' @title Calc.Cor.Seurat +#' +#' @description Calculate gene correlation on a Seurat object. +#' @param assay.use The assay to use from the Seurat object. Default: 'RNA' +#' @param slot.use The slot to use from the assay in the Seurat object. Default: 'data' +#' @param quantileX The quantile level for the calculation. Default: `0.95` +#' @param max.cells Maximum number of cells to be used in the calculation. Default: `40000` +#' @param seed The random seed used for the calculation. Default: `p$seed` +#' @param digits The number of decimal places to round the correlation and covariance values. Default: `2` +#' @param obj The Seurat object to perform calculations on. Default: `combined.obj` +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- calc.q99.Expression.and.set.all.genes(combined.obj, quantileX = 0.99, max.cells = 400000, set.all.genes = FALSE) +#' combined.obj <- Calc.Cor.Seurat(assay.use = "RNA", slot.use = "data", digits = 2, obj = combined.obj, quantile = 0.99, max.cells = 40000) +#' } +#' } +#' @importFrom tictoc tic toc +#' +#' @export +Calc.Cor.Seurat <- function( + assay.use = "RNA", + slot.use = "data", + quantileX = 0.95, + max.cells = 40000, + seed = p$"seed", + digits = 2, obj = combined.obj) { + expr.mat <- GetAssayData(slot = slot.use, assay = assay.use, object = obj) + if (ncol(expr.mat) > max.cells) { + set.seed(seed = seed) + cells.use <- sample(x = colnames(expr.mat), size = max.cells) + } else { + cells.use <- ncol(expr.mat) + } + + qname <- paste0("q", quantileX * 100) + quantile_name <- kpp("expr", qname) + + if (is.null(obj@misc[[quantile_name]])) { + iprint( + "Call: combined.obj <- calc.q99.Expression.and.set.all.genes(combined.obj, quantileX =", + quantileX, " first )" + ) + } + genes.HE <- which_names(obj@misc[[quantile_name]] > 0) + iprint("Pearson correlation is calculated for", length(genes.HE), "HE genes with expr.", qname, ": > 0.") + tictoc::tic("sparse.cor") + ls.cor <- sparse.cor(smat = t(expr.mat[genes.HE, cells.use])); tictoc::toc() + ls.cor <- lapply(ls.cor, round, digits = 2) + + slot__name <- kpp(slot.use, assay.use, quantile_name) + obj@misc[[kpp("cor", slot__name)]] <- ls.cor$"cor" + obj@misc[[kpp("cov", slot__name)]] <- ls.cor$"cov" + iprint("Stored under obj@misc$", kpp("cor", slot.use, assay.use), "or cov... .") + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Plot Gene Correlation Heatmap +#' +#' @description Generates a heatmap visualization of gene correlations based on expression data. +#' Useful for identifying groups of genes that exhibit similar expression patterns across different conditions +#' or cell types in a Seurat object. +#' +#' @param genes Vector of gene symbols to include in the correlation analysis and heatmap. +#' @param assay.use Assay from which to retrieve expression data within the Seurat object; Default: 'RNA'. +#' @param slot.use Specifies which slot of the assay to use for expression data `('data', 'scale.data', 'data.imputed')`; +#' Default: first item `('data')`. +#' @param quantileX Quantile level for calculating expression thresholds; Default: `0.95`. +#' @param min.g.cor Minimum absolute gene correlation value for inclusion in the heatmap; Default: `0.3`. +#' @param calc.COR Logical flag to calculate correlation matrix if not found in `@misc`; Default: `FALSE.` +#' @param cutRows Height at which to cut the dendrogram for rows, determining cluster formation; Default: `NULL.` +#' @param cutCols Height at which to cut the dendrogram for columns, determining cluster formation. +#' Default: same as `cutRows`. +#' @param obj Seurat object containing the data; Default: `combined.obj`. +#' @param ... Additional parameters passed to the internally called functions. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' plot.Gene.Cor.Heatmap(genes = c("Gene1", "Gene2", "Gene3"), obj = combined.obj) +#' } +#' } +#' +#' @importFrom Seurat GetAssayData +#' @importFrom pheatmap pheatmap +#' @importFrom MarkdownReports wplot_save_pheatmap +#' +#' @export plot.Gene.Cor.Heatmap +#' +plot.Gene.Cor.Heatmap <- function( + genes, + assay.use = "RNA", slot.use = c("data", "scale.data", "data.imputed")[1], quantileX = 0.95, + min.g.cor = 0.3, calc.COR = FALSE, + cutRows = NULL, cutCols = cutRows, + obj = combined.obj, ...) { + expr.mat <- GetAssayData(slot = slot.use, assay = assay.use, object = obj) + if (slot.use == c("data.imputed")) { + "WIP" + } + expr.mat <- GetAssayData(slot = slot.use, assay = assay.use, object = obj) + + qname <- paste0("expr.q", quantileX * 100) + slotname_cor.mat <- kpp("cor", slot.use, assay.use, qname) + cor.mat <- obj@misc[[slotname_cor.mat]] + + if (is.null(cor.mat)) { + iprint(slotname_cor.mat, " not found in @misc.") + iprint("Correlation slots present in @misc:", CodeAndRoll2::grepv(names(obj@misc), pattern = "^cor")) + + # Calculate --- --- --- --- --- + if (calc.COR) { + message("Calculating correlation now.") + genes.found <- check.genes(list.of.genes = genes) + message(length(genes.found), " genes are found in the object.") + + if (length(genes.found) > 200) iprint("Too many genes found in data, cor will be slow: ", length(genes.found)) + ls.cor <- sparse.cor(t(expr.mat[genes.found, ])) + cor.mat <- ls.cor$cor + } else { + stop() + } + } else { + print("Correlation is pre-calculated") + genes.found <- intersect(genes, rownames(cor.mat)) + iprint(length(genes.found), "genes are found in the correlation matrix.") + cor.mat <- cor.mat[genes.found, genes.found] + } + + + # Filter --- --- --- --- --- --- + diag(cor.mat) <- NaN + corgene.names <- union( + which_names(rowMax(cor.mat) >= min.g.cor), + which_names(rowMin(cor.mat) <= -min.g.cor) + ) + iprint(length(corgene.names), "genes are more (anti-)correlated than +/-:", min.g.cor) + + pname <- paste0("Pearson correlations of ", substitute(genes), "\n min.cor:", min.g.cor, " | ", assay.use, ".", slot.use) + o.heatmap <- pheatmap::pheatmap(cor.mat[corgene.names, corgene.names], main = pname, cutree_rows = cutRows, cutree_cols = cutCols, ...) + MarkdownReports::wplot_save_pheatmap(o.heatmap, plotname = make.names(pname)) + + # return values + maxCorrz <- rowMax(cor.mat)[corgene.names] + names(maxCorrz) <- corgene.names + dput(maxCorrz) +} + + + + + +# _________________________________________________________________________________________________ +# Seurat.object.manipulations.etc.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/Seurat.object.manipulations.etc.R') +# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Seurat.object.manipulations.etc.R")) + + + +# _________________________________________________________________________________________________ +#' @title Add Prefixes to Cell Names in Seurat Objects +#' +#' @description Adds prefixes derived from a vector of identifiers to cell names in a list of Seurat objects. +#' This is useful for ensuring unique cell names across multiple samples or conditions when combining or comparing datasets. +#' +#' @param ls_obj List of Seurat S4 objects to which prefixes will be added. Each object should correspond +#' to a different sample or condition. +#' @param obj_IDs Character vector of identifiers that will be used as prefixes. Each identifier in the vector +#' corresponds to a Seurat object in `ls_obj`. The length of `obj_IDs` must match the length of `ls_obj`. +#' +#' @examples +#' \dontrun{ +#' # Assuming seurat_obj1 and seurat_obj2 are Seurat objects +#' ls_obj <- list(seurat_obj1, seurat_obj2) +#' obj_IDs <- c("sample1", "sample2") +#' ls_obj_prefixed <- prefix_cells_seurat(ls_obj = ls_obj, obj_IDs = obj_IDs) +#' # Now each cell name in seurat_obj1 and seurat_obj2 will be prefixed with 'sample1_' and 'sample2_', respectively. +#' } +#' +#' @return A list of Seurat objects with updated cell names, incorporating the specified prefixes. +#' +#' @export +#' @importFrom Seurat RenameCells +prefix_cells_seurat <- function(ls_obj, obj_IDs) { + # Check if 'ls_obj' is a list of Seurat objects and 'obj_IDs' is a character vector of the same length + if (!is.list(ls_obj) & inherits(ls_obj, "Seurat")) ls_obj <- list(ls_obj) + stopifnot(is.list(ls_obj) & all(sapply(ls_obj, function(x) inherits(x, "Seurat")))) + stopifnot(is.character(obj_IDs) & length(ls_obj) == length(obj_IDs)) + + names_orig <- names(ls_obj) + + # Iterate over Seurat objects + ls_obj_prefixed <- lapply(seq_along(ls_obj), function(i) { + # Get the Seurat object and corresponding prefix + obj <- ls_obj[[i]] + prefix <- obj_IDs[i] + + # Add prefix to cell names + new_cell_names <- paste0(prefix, "_", colnames(obj)) + + # Rename cells in the Seurat object + obj <- RenameCells(obj, new.names = new_cell_names) + + return(obj) + }) + print(lapply(lapply(ls_obj_prefixed, colnames), head)) + + names(ls_obj_prefixed) <- names_orig + return(ls_obj_prefixed) +} + + +# _________________________________________________________________________________________________ +#' @title Check Prefix in Seurat Object Cell IDs +#' +#' @description This function checks if a prefix has been added to the standard +#' cell-IDs (16 characters of A,TRUE,C,G) in a Seurat object. If so, it prints the number of unique prefixes found, +#' issues a warning if more than one unique prefix is found, and returns the identified prefix(es). +#' +#' @param obj A Seurat object with cell IDs possibly prefixed. +#' @param cell_ID_pattern Pattern to match cellIDs (with any suffix). +#' @return A character vector of the identified prefix(es). +#' +#' @examples +#' # Assuming 'obj' is your Seurat object +#' # prefix <- find_prefix_in_cell_IDs(obj) +#' +#' @export +find_prefix_in_cell_IDs <- function(obj, cell_ID_pattern = "[ATCG]{16}.*$") { + stopifnot(inherits(obj, "Seurat")) + + # Extract cell IDs + cell_IDs <- colnames(obj) + + # Remove the standard 16-character cell-IDs + potential_prefixes <- gsub(pattern = cell_ID_pattern, replacement = "", x = cell_IDs) + + # Check if there is no prefix + if (all(potential_prefixes == "")) { + print("No prefix found in cell IDs.") + return(NULL) + } + + # Identify unique prefixes + unique_prefixes <- unique(potential_prefixes) + + # Print the number of unique prefixes + print(paste(length(unique_prefixes), "unique prefix(es) found:", head(unique_prefixes))) + + # Issue a warning if more than one unique prefix is found + if (length(unique_prefixes) > 1) { + warning("Multiple unique prefixes identified in cell IDs:", head(unique_prefixes), immediate. = TRUE) + } + + # Return the identified prefix(es) + return(unique_prefixes) +} + + + + +# _________________________________________________________________________________________________ +#' @title Create Cluster Labels for Each Cell +#' +#' @description Generates labels for each cell by combining gene names and cluster IDs. This function +#' takes a named vector, typically representing top genes for clusters (values) and their corresponding +#' cluster IDs (names), along with a vector of cell IDs. It then creates a new vector where each cell +#' is labeled with its top gene and cluster ID in the format "GeneName.ClusterID". +#' +#' @param TopGenes A named vector with gene names as values and cluster IDs as names, +#' representing the top or defining gene for each cluster. +#' @param clID.per.cell A vector of cluster IDs for each cell, used to match each cell with its +#' corresponding top gene from `TopGenes`. +#' +#' @return A vector where each element corresponds to a cell labeled with both its defining gene +#' name and cluster ID, in the format "GeneName.ClusterID". +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `TopGenes.Classic` is a named vector of top genes and cluster IDs, +#' # and `metaD.CL.colname` is a column in metadata with cluster IDs per cell +#' cellLabels <- seu.Make.Cl.Label.per.cell( +#' TopGenes = TopGenes.Classic, +#' clID.per.cell = getMetadataColumn(ColName.metadata = metaD.CL.colname) +#' ) +#' # `cellLabels` now contains labels for each cell in the format "GeneName.ClusterID" +#' } +#' } +#' +#' @export +seu.Make.Cl.Label.per.cell <- function(TopGenes, clID.per.cell) { + Cl.names_class <- TopGenes[clID.per.cell] + Cl.names_wNr <- paste0(Cl.names_class, " (", names(Cl.names_class), ")") + return(Cl.names_wNr) +} + + +# _________________________________________________________________________________________________ +#' @title Retrieve the Top Variable Genes from a Seurat Object +#' +#' @description Retrieves the names of the most variable genes from a Seurat object, +#' typically used to focus subsequent analyses on genes with the greatest variation across cells. +#' +#' @param obj A Seurat object containing gene expression data and, +#' pre-computed highly variable gene information. +#' @param nGenes The number of most variable genes to retrieve; Default: `p$nVarGenes`. +#' +#' @return A vector containing the names of the most variable genes. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `combined.obj` is a Seurat object with computed variable genes +#' varGenes <- GetMostVarGenes(obj = combined.obj, nGenes = 100) +#' } +#' } +#' +#' @export +#' @importFrom Seurat FindVariableFeatures +#' +GetMostVarGenes <- function(obj, nGenes = p$nVarGenes) { + head(rownames(slot(object = obj, name = "hvg.info")), n = nGenes) +} + +# _________________________________________________________________________________________________ +#' @title Check Gene Names in Seurat Object +#' +#' @description Examines gene names in a Seurat object for specific naming conventions, +#' such as the presence of hyphens (-) or dots (.) often found in mitochondrial gene names. +#' This function is useful for ensuring gene names conform to expected patterns, +#' especially when preparing data for compatibility with other tools or databases. +#' +#' @param Seu.obj A Seurat object containing gene expression data. +#' +#' @details This function prints out examples of gene names that contain specific characters +#' of interest (e.g., '-', '_', '.', '.AS[1-9]'). It is primarily used for data inspection +#' and cleaning before further analysis or data export. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `combined.obj` is your Seurat object +#' gene.name.check(Seu.obj = combined.obj) +#' # This will print examples of gene names containing '-', '_', '.', and '.AS[1-9]' +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{GetAssayData}} +#' +#' @importFrom Seurat GetAssayData +#' @importFrom CodeAndRoll2 grepv +#' @importFrom MarkdownHelpers llprint llogit +#' +#' @export +gene.name.check <- function(Seu.obj) { + rn <- rownames(GetAssayData(object = Seu.obj, slot = "counts")) + MarkdownHelpers::llprint("### Gene name pattern") + + MarkdownHelpers::llogit('`rn = rownames(GetAssayData(object = ls.Seurat[[1]], slot = "counts"))`') + MarkdownHelpers::llogit('`head(CodeAndRoll2::grepv(rn, pattern = "-"), 10)`') + print("pattern = -") + MarkdownHelpers::llprint(head(CodeAndRoll2::grepv(rn, pattern = "-"), 10)) + + MarkdownHelpers::llogit('`head(CodeAndRoll2::grepv(rn, pattern = "_"), 10)`') + print("pattern = _") + MarkdownHelpers::llprint(head(CodeAndRoll2::grepv(rn, pattern = "_"), 10)) + + MarkdownHelpers::llogit('`head(CodeAndRoll2::grepv(rn, pattern = "\\."), 10)`') + print("pattern = \\.") + MarkdownHelpers::llprint(head(CodeAndRoll2::grepv(rn, pattern = "\\."), 10)) + + MarkdownHelpers::llogit('`head(CodeAndRoll2::grepv(rn, pattern = "\\.AS[1-9]"), 10)`') + print("pattern = \\.AS[1-9]") + MarkdownHelpers::llprint(head(CodeAndRoll2::grepv(rn, pattern = "\\.AS[1-9]"), 10)) +} + + +# _________________________________________________________________________________________________ +#' @title Check if Gene Names exist in Seurat Object or HGNC Database +#' +#' @description Verifies the presence of specified gene names within a Seurat object or +#' queries them against the HGNC database. This function is useful for ensuring gene names are +#' correctly formatted and exist within the dataset or are recognized gene symbols. +#' +#' @param list.of.genes A vector of gene names to be checked; Default: `ClassicMarkers`. +#' @param makeuppercase If `TRUE`, converts all gene names to uppercase before checking; Default: `FALSE`. +#' @param verbose If `TRUE`, prints information about any missing genes; Default: `TRUE`. +#' @param HGNC.lookup If `TRUE`, attempts to look up any missing genes in the HGNC database to +#' verify their existence; Default: `FALSE`. +#' @param obj The Seurat object against which the gene names will be checked; Default: `combined.obj`. +#' @param assay.slot Assay slot of the Seurat object to check for gene names; Default: `'RNA'`. +#' @param data.slot Data slot of the assay to check for gene names; Default: `'data'`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Check for the presence of a gene name in uppercase +#' check.genes(list.of.genes = "top2a", makeuppercase = TRUE, obj = combined.obj) +#' +#' # Check for a gene name with verbose output and HGNC lookup +#' check.genes(list.of.genes = "VGLUT2", verbose = TRUE, HGNC.lookup = TRUE, obj = combined.obj) +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{GetAssayData}}, \code{\link[DatabaseLinke.R]{qHGNC}} +#' +#' @export +#' @importFrom DatabaseLinke.R qHGNC +#' @importFrom Seurat GetAssayData +#' @importFrom Stringendo percentage_formatter +#' +check.genes <- function( + list.of.genes = ClassicMarkers, makeuppercase = FALSE, HGNC.lookup = FALSE, + obj = combined.obj, + assay.slot = c("RNA", "integrated")[1], + data.slot = c("counts", "data")[2], + verbose = TRUE, + ...) { + tictoc::tic("check.genes") + message(" > Running check.genes...") + message("assay: ", assay.slot, ", data.slot: ", data.slot) + + if (makeuppercase) list.of.genes <- toupper(list.of.genes) + all_genes <- rownames(GetAssayData(object = obj, assay = assay.slot, slot = data.slot)) + + missingGenes <- setdiff(list.of.genes, all_genes) + if (length(missingGenes) > 0) { + if (verbose) { + message( "\n", length(missingGenes), " or ", + Stringendo::percentage_formatter(length(missingGenes) / length(list.of.genes)), + " genes not found in the data, e.g: ", kppc(head(missingGenes, n = 10)) + ) + } + if (HGNC.lookup) { + if (exists("qHGNC", mode = "function")) { + try(DatabaseLinke.R::qHGNC(missingGenes, Open = F)) + } else { + warning("DatabaseLinke.R's qHGNC() function is needed, please install from github.", immediate. = TRUE) + } + } + } + tictoc::toc() + intersect(list.of.genes, all_genes) +} + + + +# _________________________________________________________________________________________________ +#' @title Fix Zero Indexing in Seurat Clustering +#' +#' @description Adjusts Seurat object metadata to fix zero-based cluster indexing, converting it to one-based indexing. +#' This function modifies a specified metadata column in the Seurat object to replace zero-indexed cluster names with one-based indexing. +#' +#' @param ColName.metadata The name of the metadata column containing zero-based cluster indices; Default: `'res.0.6'`. +#' @param obj The Seurat object to be modified; Default: `org`. +#' +#' @return The Seurat object with the specified metadata column's cluster indices adjusted to one-based indexing. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `org` is a Seurat object with zero-based cluster indexing +#' org <- fixZeroIndexing.seurat(ColName.metadata = "res.0.6", obj = org) +#' # Now, `org` has its cluster indices in the 'res.0.6' metadata column adjusted to one-based indexing +#' } +#' } +#' +#' @export +fixZeroIndexing.seurat <- function(ColName.metadata = "res.0.6", obj = org) { + obj@meta.data[, ColName.metadata] <- as.numeric(obj@meta.data[, ColName.metadata]) + 1 + print(obj@meta.data[, ColName.metadata]) + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Calculate Fraction of Genes in Transcriptome +#' +#' @description Calculates the fraction of specified genes within the entire transcriptome of +#' each cell in a Seurat object. +#' This function is useful for assessing the relative abundance of a set of genes across cells, +#' such as identifying cells with high expression of marker genes. +#' +#' @param geneset A character vector of gene symbols for which the fraction in the transcriptome will be calculated. +#' Default: `c("MALAT1")`. The function will check for the existence of these genes in the Seurat object. +#' @param obj A Seurat object containing gene expression data; Default: `combined.obj`. +#' The function extracts gene expression data from this object to calculate fractions. +#' @param data.slot The data slot from which to extract expression data. This can be `"counts"` +#' for raw counts or `"data"` for normalized data; Default: second element (`"data"`). +#' +#' @return A numeric vector where each element represents the fraction of the specified geneset's expression +#' relative to the total transcriptome of a cell, expressed as a percentage. The names of the vector correspond to cell IDs. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `combined.obj` is your Seurat object +#' fractionInTranscriptome <- CalculateFractionInTranscriptome(geneset = c("MALAT1", "GAPDH"), obj = combined.obj) +#' # This will return the fraction of MALAT1 and GAPDH in the transcriptome of each cell +#' } +#' } +#' +#' @note This function calls `check.genes` to verify the existence of the specified genes within the Seurat object. +#' If genes are not found, it will return a warning. +#' +#' @seealso \code{\link[Seurat]{GetAssayData}} for retrieving expression data from a Seurat object. +#' +#' @export +#' +CalculateFractionInTrome <- function( + genesCalc.Cor.Seuratet = c("MALAT1"), + obj = combined.obj, + data.slot = c("counts", "data")[2]) { + warning(" >>>> Use addMetaFraction() <<<<", immediate. = TRUE) + geneset <- check.genes(list.of.genes = geneset) + stopifnot(length(geneset) > 0) + + mat <- as.matrix(slot(obj@assays$RNA, name = data.slot)) + mat.sub <- mat[geneset, , drop = FALSE] + RC.per.cell.geneset <- colSums(mat.sub) + + RC.per.cell <- colSums(mat) + gene.fraction.per.cell <- 100 * RC.per.cell.geneset / RC.per.cell + return(gene.fraction.per.cell) +} + +# _________________________________________________________________________________________________ +#' @title AddNewAnnotation +#' +#' @description This function creates a new metadata column based on an existing metadata column +#' and a list of mappings (name <- IDs). +#' @param obj A Seurat object for which the new annotation is to be created. Default: 'obj'. +#' @param source A character string specifying the existing metadata column to be used as the +#' basis for the new annotation. Default: 'RNA_snn_res.0.5'. +#' @param named.list.of.identities A named list providing the mappings for the new annotation. +#' Default: 'ls.Subset.ClusterLists'. +#' @return A character vector representing the new metadata column. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Subset.ClusterLists <- list("hESC.h9" = c("4", "10", "14"), "hESC.176" = c("0", "1", "2")) +#' AddNewAnnotation() +#' } +#' } +#' @export +AddNewAnnotation <- function( + obj = obj, + source = "RNA_snn_res.0.5", named.list.of.identities = ls.Subset.ClusterLists) { + NewID <- df.col.2.named.vector(obj[[source]]) + + for (i in 1:length(named.list.of.identities)) { + lx <- as.character(named.list.of.identities[[i]]) + name.lx <- names(named.list.of.identities)[i] + NewID <- CodeAndRoll2::translate(vec = NewID, old = lx, new = name.lx) + } + print(table(NewID)) + return(NewID) +} + + +# _________________________________________________________________________________________________ +#' @title whitelist.subset.ls.Seurat +#' +#' @description Subsets cells in a list of Seurat objects based on an externally provided list of cell IDs. +#' @param ls.obj A list of Seurat objects. Default: ls.Seurat. +#' @param metadir Directory for the metadata. Default: p$cellWhiteList. +#' @param whitelist.file Filename of the whitelist containing cell IDs. Default: "NonStressedCellIDs.2020.10.21_18h.tsv". +#' @return A list of Seurat objects containing only the cells specified in the whitelist. +#' @details The function first validates the presence of all identities from the metadata in the +#' Seurat objects. If all identities are present, the function subsets each Seurat object based on +#' the whitelist of cell IDs. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Seurat.subset <- whitelist.subset.ls.Seurat( +#' ls.obj = ls.Seurat, metadir = p$"cellWhiteList", +#' whitelist.file = "NonStressedCellIDs.2020.10.21_18h.tsv" +#' ) +#' } +#' } +#' @seealso +#' \code{\link[Seurat]{subset}} +#' @importFrom ReadWriter read.simple.tsv +#' +#' @export +whitelist.subset.ls.Seurat <- function( + ls.obj = ls.Seurat, + metadir = p$"cellWhiteList" # '~/Dropbox/Abel.IMBA/MetadataD/POL.meta/cell.lists/' + , whitelist.file = "NonStressedCellIDs.2020.10.21_18h.tsv") { + cells.before <- sapply(ls.obj, ncol) + # Find file + df.cell.whitelist <- ReadWriter::read.simple.tsv(metadir, whitelist.file) + dsets <- table(df.cell.whitelist[, 1]) + + ls.orig.idents <- lapply(lapply(ls.Seurat, getMetadataColumn, ColName.metadata = "orig.ident"), unique) + stopif(any(sapply(ls.orig.idents, l) == length(ls.Seurat)), message = "Some ls.Seurat objects have 1+ orig identity.") + + dsets.in.lsSeu <- unlist(ls.orig.idents) + isMathced <- all(dsets.in.lsSeu == names(dsets)) # Stop if either ls.Seurat OR the metadata has identities not found in the other, in the same order. + stopif(!isMathced, message = paste( + "either ls.Seurat OR the metadata has identities not found in the other, or they are not in same order.", + kpps(dsets.in.lsSeu), "vs.", kpps(names(dsets)) + )) + + # identX <- ls.orig.idents[[1]] + for (i in 1:length(ls.orig.idents)) { + identX <- ls.orig.idents[[i]] + print(identX) + + # Extract and process cellIDs + idx.match <- which(df.cell.whitelist[, 1] == identX) + cell.whitelist <- rownames(df.cell.whitelist)[idx.match] + cell.whitelist <- substr( + x = cell.whitelist, + start = 1, stop = nchar(cell.whitelist) - 2 + ) + + # Extract and process cellIDs + ls.obj[[i]] <- subset(x = ls.obj[[i]], cells = cell.whitelist) + } + cells.after <- sapply(ls.obj, ncol) + iprint("cells.before", cells.before, "cells.after", cells.after) + return(ls.obj) +} + +# _________________________________________________________________________________________________ +#' @title FindCorrelatedGenes +#' +#' @description Find correlated genes in a Seurat object +#' @param gene Gene of interest. Default: 'TOP2A' +#' @param obj Seurat object to find the correlated genes from. Default: `combined.obj` +#' @param assay Assay to be used from the Seurat object. Default: 'RNA' +#' @param slot Slot to be used from the specified assay in the Seurat object. Default: 'data' +#' @param HEonly Logical, if TRUE, filters matrix to high-expressing genes only. Default: `FALSE`. +#' @param minExpr Minimum expression level for a gene to be considered. Default: 1 +#' @param minCells Minimum number of cells expressing a gene for the gene to be considered. Default: 1000 +#' @param trailingNgenes Number of top genes to consider based on their correlation. Default: 1000 +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' FindCorrelatedGenes(gene = "TOP2A", obj = combined.obj) +#' write_clip(names(head(topGenes[-(1:6)], n = 50))) +#' } +#' } +#' @seealso +#' \code{\link[matrixStats]{rowSums2}} +#' @importFrom matrixStats rowSums2 +#' @importFrom tictoc tic toc +#' @importFrom MarkdownReports wbarplot +#' +#' @export +FindCorrelatedGenes <- function( + gene = "TOP2A", obj = combined.obj, assay = "RNA", slot = "data", + HEonly = FALSE, minExpr = 1, minCells = 1000, + trailingNgenes = 1000 + ) { + + tictoc::tic("FindCorrelatedGenes") + AssayData <- GetAssayData(object = obj, assay = assay, slot = slot) + matrix_mod <- iround(as.matrix(AssayData)) + if (HEonly) { + idx.pass <- (matrixStats::rowSums2(matrix_mod > minExpr) > minCells) + pc_TRUE(idx.pass) + matrix_mod <- matrix_mod[which(idx.pass), ] + } + geneExpr <- as.numeric(matrix_mod[gene, ]) + correlations <- apply(matrix_mod, 1, cor, geneExpr) + topGenes <- trail(sort(correlations, decreasing = TRUE), N = trailingNgenes) + tictoc::toc() + MarkdownReports::wbarplot(head(topGenes, n = 25)) + topGenes +} + + + +# _________________________________________________________________________________________________ +# _________________________________________________________________________________________________ + + + +# _________________________________________________________________________________________________ +# Seurat.update.gene.symbols.HGNC.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/Seurat.update.gene.symbols.HGNC.R') +# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Seurat.update.gene.symbols.HGNC.R")) +# require(HGNChelper) + + + +#' @title Update Gene Symbols in a Seurat Object +#' +#' @description This function updates gene symbols in a Seurat object based on current gene +#' nomenclature guidelines, using HGNChelper(). It checks and updates gene symbols to their +#' latest approved versions,ensuring that gene annotations are current and consistent. +#' The function optionally enforces unique gene symbols and provides statistics on the update process. +#' +#' @param obj A Seurat object containing gene expression data; Default: `ls.Seurat[[i]]` +#' (ensure to replace `i` with the actual index or variable referencing your Seurat object). +#' @param species_ The species for which the gene symbols are checked and updated, +#' used to ensure the correct gene nomenclature is applied; Default: `'human'`. +#' Supports `'human'`, `'mouse'`, etc., as specified in the `HGNChelper` package. +#' @param EnforceUnique Logical flag indicating whether to enforce unique gene symbols +#' within the Seurat object. When set to `TRUE`, it resolves issues with duplicated gene symbols +#' by appending unique identifiers; Default: `TRUE`. +#' @param ShowStats Logical flag indicating whether to display statistics about the gene +#' symbol update process. When set to `TRUE`, it prints detailed information on the console +#' about the changes made; Default: `FALSE`. +#' +#' @return A modified Seurat object with updated gene symbols. The function directly modifies +#' the input Seurat object, ensuring that gene symbols adhere to the latest nomenclature. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `mySeuratObject` is your Seurat object +#' updatedSeuratObject <- UpdateGenesSeurat( +#' obj = mySeuratObject, species_ = "human", +#' EnforceUnique = TRUE, ShowStats = TRUE +#' ) +#' # `updatedSeuratObject` now has updated gene symbols +#' } +#' } +#' +#' @seealso +#' \code{\link[HGNChelper]{checkGeneSymbols}} for details on checking and updating gene symbols. +#' +#' @export +#' @importFrom HGNChelper checkGeneSymbols +#' +UpdateGenesSeurat <- function(obj = ls.Seurat[[i]], species_ = "human", assay = "RNA", + EnforceUnique = TRUE, ShowStats = FALSE) { + assays.present <- Assays(obj) + for (assay in assays.present) { + + message("Renaming in assay: ", assay, "...") + + all.genes <- Features(obj, assay = assay) + HGNC.updated <- HGNChelper::checkGeneSymbols(all.genes, unmapped.as.na = FALSE, map = NULL, species = species_) + if (EnforceUnique) HGNC.updated <- HGNC.EnforceUnique(HGNC.updated) + if (ShowStats) { + print(HGNC.updated) + print(GetUpdateStats(HGNC.updated)) + } + + obj <- RenameGenesSeurat(obj, newnames = HGNC.updated$"Suggested.Symbol", assay = assay) + + } + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Rename Gene Symbols in a Seurat Object +#' +#' @description This function replaces gene names across various slots within a specified assay +#' of a Seurat object. It is designed to be run prior to any data integration or downstream analysis +#' processes. The function targets the `@counts`, `@data`, and `@meta.features` slots within +#' the specified assay, ensuring consistency in gene nomenclature across the object. +#' +#' @param obj A Seurat object containing the assay and slots to be updated; Default: `ls.Seurat[[i]]` +#' (replace `i` with the appropriate index). +#' @param newnames A character vector containing the new gene names intended to replace the +#' existing ones; Default: `HGNC.updated[[i]]$Suggested.Symbol`. Ensure this matches the order +#' and length of the genes in the specified assay. +#' @param assay The name of the assay within the Seurat object where gene names will be updated; +#' Default: `"RNA"`. This function assumes simple objects containing only an RNA assay. +#' @param slots A character vector specifying which slots within the assay to update. Possible +#' values include `"data"`, `"counts"`, and `"meta.features"`; other layers can be specified if present. +#' +#' @details It is crucial to run this function before any data integration or further analysis +#' to ensure gene symbol consistency. The function does not support complex objects with multiple +#' assays where dependencies between assays might lead to inconsistencies. Use with caution and +#' verify the results. +#' +#' @note This function modifies the Seurat object in place, changing gene symbols directly within +#' the specified slots. Be sure to have a backup of your Seurat object if needed before applying +#' this function. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `SeuratObj` is your Seurat object +#' # and `HGNC.updated.genes` contains the updated gene symbols +#' SeuratObj <- RenameGenesSeurat( +#' obj = SeuratObj, +#' newnames = HGNC.updated.genes$Suggested.Symbol +#' ) +#' # `SeuratObj` now has updated gene symbols in the specified assay and slots +#' } +#' } +#' +#' @export +RenameGenesSeurat <- function(obj = ls.Seurat[[i]], + newnames = HGNC.updated[[i]]$Suggested.Symbol, + assay = "RNA", + slots = c("data", "counts", "meta.features")) { + # + message("RenameGenesSeurat, assay: ", assay) + warning("Run this before integration and downstream processing. It only attempts to change + @counts, @data, and @meta.features in obj@assays$YOUR_ASSAY.", immediate. = TRUE) + + stopifnot( + "Unequal gene name sets: nrow(assayobj) != nrow(newnames):" = + length(Features(obj, assay = assay)) == length(newnames) + ) + + if (obj@version < "5") warning("obj@version < 5. Old versions are not supported. Update the obj!", immediate. = TRUE) + + if ("scale.data" %in% slots) { + n_genes_sc_dta <- nrow(obj@assays[[assay]]$"scale.data") + stopifnot( + "scale.data does has different number of genes than newnames!" = + n_genes_sc_dta == length(newnames) + ) + } + + LayersFound <- SeuratObject::Layers(obj@assays[[assay]]) + iprint("Present: ", sort(LayersFound)) + + slots <- sort(intersect(slots, LayersFound)) + iprint("Replaced: ", slots) + + for (slotX in slots) { + print(slotX) + if (slotX == "scale.data") browser() + nrO <- nrow(SeuratObject::GetAssayData(object = obj, assay = assay, layer = slotX)) + obj <- .check_and_rename(obj, assay, newnames = newnames, layer.name = slotX) + nrN <- nrow(SeuratObject::GetAssayData(object = obj, assay = assay, layer = slotX)) + stopifnot(nrN == nrO) + } + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Check and Rename Gene Names in Seurat Assay Object +#' +#' @description This function renames rows (genes) in a specified slot of a Seurat assay object. +#' It supports slots storing data as either a dense or a sparse matrix (dgCMatrix) or data.frame. +#' +#' @param obj A Seurat object. +#' @param assay An Assay name in a Seurat object. +#' @param newnames A character vector of new gene names to be assigned. +#' @param layer.name A string specifying the slot in the Assay object to be updated. +#' Valid options typically include 'counts', 'data', or 'scale.data'. +#' +#' @return An Assay object with updated gene names in the specified slot. +#' @examples +#' \dontrun{ +#' # Assuming 'seurat_obj' is a Seurat object and 'new_gene_names' is a vector of gene names +#' updated_assay <- check_and_rename( +#' assayobj = seurat_obj[["RNA"]], +#' newnames = new_gene_names, +#' layer.name = "counts" +#' ) +#' } +.check_and_rename <- function(obj, assay, newnames, layer.name) { + cat(layer.name, fill = TRUE) + + length_newnames <- length(newnames) + length_orig_names <- length(Features(obj, assay = assay)) + + stopifnot( + is(obj, "Seurat"), + is.character(assay), + is.character(layer.name), + is.character(newnames), + length_orig_names == length_newnames + ) + + assayobj <- obj@assays[[assay]] + feature.list <- rownames(assayobj@features@.Data) + + if (length(feature.list) == length(newnames)) { + rownames(assayobj@features@.Data) <- newnames + nrX <- length(rownames(assayobj@features@.Data)) + } else { + iprint("length feature.list", length(feature.list), "length newnames", length(newnames)) + stop() + } + + if (layer.name %in% SeuratObject::Layers(assayobj)) { + matrix_n <- SeuratObject::LayerData(assayobj, layer = layer.name) + nr1 <- nrow(matrix_n) + + if (all(dim(matrix_n)) > 0) { + # browser() + stopifnot(nrow(matrix_n) == length(newnames)) + + if ("dgCMatrix" %in% class(matrix_n)) { + message(assay, "@", layer.name, " is of type dgeCMatrix!") + matrix_n@Dimnames[[1]] <- newnames + } else if ("matrix" %in% class(matrix_n)) { + message(assay, "@", layer.name, " is of type Matrix!") + rownames(matrix_n) <- newnames + } else if ("data.frame" %in% class(matrix_n)) { + message(assay, "@", layer.name, " is of type data.frame!") + rownames(matrix_n) <- newnames + } else { + warning(">>> No renaming: ", assay, "@", layer.name, + " not of type dgeCMatrix / Matrix / data.frame.", + immediate. = TRUE + ) + } + stopifnot(nr1 == nrow(matrix_n)) + + SeuratObject::LayerData(assayobj, layer = layer.name) <- matrix_n + nr3 <- nrow(SeuratObject::LayerData(assayobj, layer = layer.name)) + stopifnot(nr3 == nrX) + } + } else { + warning(paste(">>>", assay, "@", layer.name, "does not exist!"), immediate. = TRUE) + } + # obj <- SetAssayData(obj, layer = layer.name, new.data = matrix_n) + obj@assays[[assay]] <- assayobj + return(obj) +} + +# _________________________________________________________________________________________________ +#' @title Remove Specific Genes from a Seurat Object +#' +#' @description Removes specified genes from the metadata, counts, data, and scale.data slots of a Seurat object. +#' This operation is typically performed prior to data integration to ensure that gene sets are consistent +#' across multiple datasets. The function modifies the Seurat object in place. +#' +#' @param obj A Seurat object; Default: `ls.Seurat[[i]]` (please ensure to replace `i` with the actual index or variable). +#' @param symbols2remove A character vector specifying the genes to be removed from the Seurat object; +#' Default: `c("TOP2A")`. +#' +#' @details This function directly modifies the `@counts`, `@data`, and `@scale.data` slots within +#' the RNA assay of the provided Seurat object, as well as the `@meta.data` slot. It's important to run +#' this function as one of the initial steps after creating the Seurat object and before proceeding +#' with downstream analyses or integration processes. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `SeuratObj` is your Seurat object and you want to remove the gene "TOP2A" +#' updatedSeuratObj <- RemoveGenesSeurat(obj = SeuratObj, symbols2remove = "TOP2A") +#' # Now `updatedSeuratObj` does not contain "TOP2A" in the specified slots +#' } +#' } +#' +#' @return A Seurat object with the specified genes removed from the mentioned slots. +#' +#' @export +RemoveGenesSeurat <- function(obj = ls.Seurat[[i]], symbols2remove = c("TOP2A")) { + print("Run this as the first thing after creating the Seurat object. + It only removes genes from: metadata; obj@assays$RNA@counts, @data and @scale.data.") + RNA <- obj@assays$RNA + + if (length(RNA@counts)) { + NotFound <- setdiff(symbols2remove, RNA@counts@Dimnames[[1]]) + if (length(NotFound) == 0) { + RNA@counts@Dimnames[[1]] <- symbols2remove + print("Genes removed from RNA@counts") + } else { + print("Not All Genes Found in RNA@counts. Missing:") + print(NotFound) + } + } + if (length(RNA@data)) { + if (length(setdiff(symbols2remove, RNA@data@Dimnames[[1]])) == 0) { + RNA@data@Dimnames[[1]] <- symbols2remove + print("Genes removed from RNA@data.") + } else { + print("Not All Genes Found in RNA@data") + } + } + if (length(RNA@scale.data)) { + if (length(setdiff(symbols2remove, RNA@scale.data@Dimnames[[1]])) == 0) { + RNA@scale.data@Dimnames[[1]] <- symbols2remove + print("Genes removed from RNA@scale.data.") + } else { + print("Not All Genes Found in RNA@scale.data") + } + } + if (length(obj@meta.data)) { + if (length(setdiff(symbols2remove, rownames(obj@meta.data))) == 0) { + rownames(obj@meta.data) <- symbols2remove + print("Genes removed from @meta.data.") + } else { + print("Not All Genes Found in @metadata") + } + } + obj@assays$RNA <- RNA + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Enforce Unique HGNC Gene Symbols +#' +#' @description Ensures that gene symbols are unique after being updated with HGNC symbols. This function +#' applies a suffix to duplicate gene symbols to enforce uniqueness. While using `make.unique` might not +#' be the ideal solution due to potential mismatches, it significantly reduces the number of mismatching +#' genes in certain scenarios, making it a practical approach for data integration tasks. +#' +#' @param updatedSymbols A data frame or matrix containing gene symbols updated via `HGNChelper::checkGeneSymbols()`. +#' The third column should contain the updated gene symbols that are to be made unique. +#' +#' @return A modified version of the input data frame or matrix with unique gene symbols in the third column. +#' If duplicates were found, they are made unique by appending `.1`, `.2`, etc., to the repeated symbols. +#' +#' @details The function specifically targets the issue of duplicate gene symbols which can occur after +#' updating gene symbols to their latest HGNC-approved versions. Duplicate symbols can introduce +#' ambiguity in gene expression datasets, affecting downstream analyses like differential expression or +#' data integration. By ensuring each gene symbol is unique, this function helps maintain the integrity +#' of the dataset. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `SymUpd` is your data frame of updated symbols from HGNChelper::checkGeneSymbols() +#' uniqueSymbols <- HGNC.EnforceUnique(updatedSymbols = SymUpd) +#' # `uniqueSymbols` now contains unique gene symbols in its third column +#' } +#' } +#' +#' @note This function is a workaround for ensuring unique gene symbols and might not be suitable +#' for all datasets or analyses. It's important to review the results and ensure that the gene +#' symbols accurately represent your data. +#' +#' @export +HGNC.EnforceUnique <- function(updatedSymbols) { + NGL <- updatedSymbols[, 3] + if (any.duplicated(NGL)) { + updatedSymbols[, 3] <- make.unique(NGL) + "Unique names are enforced by suffixing .1, .2, etc." + } + return(updatedSymbols) +} + + + + +# _________________________________________________________________________________________________ +#' @title Gene Symbol Update Statistics +#' +#' @description Generates statistics on the gene symbol updates performed by `UpdateGenesSeurat()`. +#' This function analyzes the data frame of gene symbols before and after the update process, +#' providing insights into the proportion and total number of genes that were updated. +#' +#' @param genes A data frame of gene symbols before and after update, typically the output of +#' `UpdateGenesSeurat()`. Default: `HGNC.updated[[i]]` (where `i` is the index of the desired +#' Seurat object in a list). +#' +#' @return A named vector with statistics on gene updates, including the percentage of updated genes, +#' the absolute number of updated genes, and the total number of genes processed. +#' +#' @details The function examines the `Approved` column of the input data frame to identify +#' gene symbols marked for update and compares the original and suggested symbols to determine +#' actual updates. The statistics highlight the efficiency and impact of the gene symbol +#' updating process, aiding in the assessment of data preprocessing steps. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `HGNC.updated.genes` is your data frame containing the original and +#' # suggested gene symbols, as returned by `UpdateGenesSeurat()` +#' updateStats <- GetUpdateStats(genes = HGNC.updated.genes) +#' # `updateStats` now contains the update statistics, including percentage and count of updated genes +#' } +#' } +#' +#' @note The function requires the input data frame to have specific columns as produced by +#' `HGNChelper::checkGeneSymbols()` and subsequently processed by `UpdateGenesSeurat()`. +#' Ensure that the input adheres to this format for accurate statistics. +#' +#' @seealso \code{\link{UpdateGenesSeurat}}, for the function that updates gene symbols and produces +#' the input data frame for this function. +#' +#' @importFrom Stringendo percentage_formatter +#' +#' @export +GetUpdateStats <- function(genes = HGNC.updated[[i]]) { + MarkedAsUpdated <- genes[genes$Approved == FALSE, ] + AcutallyUpdated <- sum(MarkedAsUpdated[, 1] != MarkedAsUpdated[, 3]) + UpdateStats <- c( + "Updated (%)" = Stringendo::percentage_formatter(AcutallyUpdated / nrow(genes)), + "Updated Genes" = floor(AcutallyUpdated), "Total Genes" = floor(nrow(genes)) + ) + return(UpdateStats) +} + + +# _________________________________________________________________________________________________ +#' @title PlotUpdateStats +#' +#' @description Creates a scatter plot of update statistics. +#' @param mat A matrix containing update statistics. Default: UpdateStatMat. +#' @param column.names A character vector of column names in the mat parameter. Default: c("Updated (%)", "Updated (Nr.)"). +#' @return A scatter plot displaying update statistics. +#' @details This function takes a matrix containing update statistics and column names to plot +#' the corresponding statistics. It colorizes the genes and plots the percentage of total genes +#' updated against the number of genes updated. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' PlotUpdateStats(mat = result.of.GetUpdateStats) +#' } +#' } +#' @seealso +#' \code{\link[wplot]{wplot}}, \code{\link[wcolorize]{wcolorize}} +#' @importFrom MarkdownReports wplot wlegend +#' +#' @export +PlotUpdateStats <- function(mat = UpdateStatMat, column.names = c("Updated (%)", "Updated (Nr.)")) { # Scatter plot of update stats. + stopifnot(column.names %in% colnames(UpdateStatMat)) + HGNC.UpdateStatistics <- mat[, column.names] + HGNC.UpdateStatistics[, "Updated (%)"] <- 100 * HGNC.UpdateStatistics[, "Updated (%)"] + colnames(HGNC.UpdateStatistics) <- c("Gene Symbols updated (% of Total Genes)", "Number of Gene Symbols updated") + lll <- wcolorize(vector = rownames(HGNC.UpdateStatistics)) + MarkdownReports::wplot(HGNC.UpdateStatistics, + col = lll, + xlim = c(0, max(HGNC.UpdateStatistics[, 1])), + ylim = c(0, max(HGNC.UpdateStatistics[, 2])) + ) + MarkdownReports::wlegend(NamedColorVec = lll, poz = 1) +} + + + +# _________________________________________________________________________________________________ +# Handling SNP demux table results coming from SoupOrCell ______________________________ ---- +# _________________________________________________________________________________________________ + + + + + + +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +# Read.Write.Save.Load.functions.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/Read.Write.Save.Load.functions.R') +# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Read.Write.Save.Load.functions.R")) + +"Multicore read / write (I/O) functions are https://github.com/vertesy/Seurat.multicore" +"Single core read / write (I/O) functions are in https://github.com/vertesy/Seurat.utils/" + + +# _________________________________________________________________________________________________ +#' @title Convert10Xfolders +#' +#' @description This function takes a parent directory with a number of subfolders, each +#' containing the standard output of 10X Cell Ranger. It (1) loads the (filtered) data matrices, +#' (2) converts them to Seurat objects, and (3) saves them as .qs files +#' +#' @param InputDir A character string specifying the input directory. +#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: `FALSE`.. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'. +#' @param suffix A character string specifying the suffix of the files saved. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 5. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`.. +#' @param save Save .qs object? Default: `TRUE`.. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`.. +#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: `TRUE`.. +#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2. +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`.. +#' @param save_empty_droplets save empty droplets? Default: `TRUE`.. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) Convert10Xfolders(InputDir) +#' } +#' @export +Convert10Xfolders <- function( + InputDir, + regex = FALSE, + folderPattern = c("filtered_feature", "raw_feature", "SoupX_decont")[1], + suffix = strsplit(folderPattern, "_")[[1]][1], + depth = 4, + min.cells = 5, min.features = 200, + updateHGNC = TRUE, ShowStats = TRUE, + writeCBCtable = TRUE, + nthreads = .getNrCores(), + preset = "high", + ext = "qs", + sort_alphanumeric = TRUE, + save_empty_droplets = TRUE, + ...) { + + stopifnot( + is.character(InputDir), dir.exists(InputDir), + is.logical(regex), is.character(folderPattern), is.character(suffix), is.numeric(depth), + is.numeric(min.cells), is.numeric(min.features), is.logical(updateHGNC), is.logical(ShowStats), is.logical(writeCBCtable), + is.logical(sort_alphanumeric) + ) + + finOrig <- ReplaceRepeatedSlashes(list.dirs.depth.n(InputDir, depth = depth)) + fin <- CodeAndRoll2::grepv(x = finOrig, pattern = folderPattern, perl = regex) + + message(length(fin), " samples found.") + + samples <- basename(list.dirs(InputDir, recursive = FALSE)) + if (sort_alphanumeric) samples <- gtools::mixedsort(samples) + iprint("Samples:", samples) + + if (!length(fin) > 0) { + stop(paste("No subfolders found with pattern", folderPattern, "in dirs like: ", finOrig[1:3])) + } + + for (i in 1:length(fin)) { + print(i) + pathIN = Stringendo::FixPath(fin[i]); message(pathIN) + fnameIN = basename(dirname(dirname(pathIN))); message(fnameIN) + + count_matrix <- Read10X(pathIN ) + if (!is.list(count_matrix) | length(count_matrix) == 1) { + seu <- CreateSeuratObject( + counts = count_matrix, project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + } else { + ( stop('length(count_matrix) != 1') ) + } + + ncells <- ncol(seu) + fname_X <- Stringendo::sppp( + fnameIN, suffix, "min.cells", min.cells, "min.features", min.features, + "cells", ncells + ) + print(fname_X) + + f.path.out <- Stringendo::ParseFullFilePath(path = InputDir, file_name = fname_X, extension = ext) + message(f.path.out) + + # update --- --- --- + if (updateHGNC) seu <- UpdateGenesSeurat(seu, EnforceUnique = TRUE, ShowStats = TRUE) + + # write out --- --- --- + if (save) qs::qsave(x = seu, file = f.path.out, nthreads = nthreads, preset = preset) + + # write cellIDs --- --- --- + if (writeCBCtable) { + CBCs <- t(t(colnames(seu))) + colnames(CBCs) <- "CBC" + ReadWriter::write.simple.tsv(input_df = CBCs, manual_file_name = sppp(fnameIN, suffix, "CBC"), manual_directory = InputDir) + } + + if(save_empty_droplets & suffix == "raw") { + # Select and save empty droplets (the Soup) + + path_filtered <- gsub(x = pathIN, pattern = "/raw_feature_", replacement = "/filtered_feature_") + fnp_filtered <- spps(path_filtered, "barcodes.tsv.gz") + + + if (file.exists(fnp_filtered)) { + SoupDir <- spps(InputDir, "Soup") + dir.create(SoupDir) + + CBCs_HQ <- read.simple.vec(fnp_filtered) + + CBC_empty_drops <- setdiff(colnames(seu), CBCs_HQ) + nr.empty.droplets <- length(CBC_empty_drops) + umi_per_CBC <- colSums(seu@assays$RNA@layers$counts) + pct.empty.droplets.max10umis <- pc_TRUE(umi_per_CBC<11) + message("We have ", nr.empty.droplets, " empty droplets, ", pct.empty.droplets.max10umis, " of which have max 10 umis." ) + FNM <- sppp("nr.empty.droplets", fnameIN, nr.empty.droplets) + ReadWriter::write.simple.vec(nr.empty.droplets, manual_file_name = FNM, manual_directory = SoupDir) + + obj_empty_drops <- subset(seu, cells = CBC_empty_drops) + + f_path_out_ED <- Stringendo::ParseFullFilePath(path = SoupDir, file_name = sppp("obj.empty.droplets", fnameIN, nr.empty.droplets), extension = ext) + qs::qsave(x = obj_empty_drops, file = f_path_out_ED, nthreads = nthreads, preset = preset) + + # save the bulk RNA counts of the empty droplets + Soup.Bulk.RNA <- rowSums(count_matrix[, CBC_empty_drops]) + f_path_out_Bulk <- Stringendo::ParseFullFilePath(path = SoupDir, file_name = sppp("Soup.Bulk.RNA", fnameIN), extension = 'qs') + qs::qsave(x = Soup.Bulk.RNA, file = f_path_out_Bulk, nthreads = nthreads, preset = preset) + ReadWriter::write.simple.tsv(Soup.Bulk.RNA, suffix = fnameIN, manual_directory = SoupDir) + + } + } else { + message("No empty droplets saved. suffix ", suffix) + } + + + + } # for +} + + + +# _________________________________________________________________________________________________ +#' @title ConvertDropSeqfolders +#' +#' @description This function takes a parent directory with a number of subfolders, each +#' containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, +#' (2) converts them to Seurat objects, and (3) saves them as .RDS files. +#' @param InputDir A character string specifying the input directory. +#' @param folderPattern A character string specifying the pattern of folder names to be searched. Default: 'SRR*'. +#' @param filePattern A character string specifying the pattern of file names to be searched. Default: 'expression.tsv.gz'. +#' @param useVroom A logical value indicating whether to use vroom. Default: `TRUE`.. +#' @param col_types.vroom A list defining column types for vroom. Default: list("GENE" = "c", .default = "d"). +#' @param min.cells An integer value specifying the minimum number of cells. Default: 10. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`.. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`.. +#' @param minDimension An integer value specifying the minimum dimension. Default: 10. +#' @param overwrite A logical value indicating whether to overwrite files. Default: `FALSE`.. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ConvertDropSeqfolders(InputDir = InputDir) +#' } +#' } +#' @seealso +#' \code{\link[vroom]{vroom}} +#' \code{\link[readr]{read_delim}} +#' @export +#' @importFrom vroom vroom +#' @importFrom readr read_tsv +ConvertDropSeqfolders <- function( + InputDir, + folderPattern = "SRR*", filePattern = "expression.tsv.gz", + useVroom = TRUE, col_types.vroom = list("GENE" = "c", .default = "d"), + min.cells = 10, min.features = 200, updateHGNC = TRUE, ShowStats = TRUE, minDimension = 10, overwrite = FALSE) { + InputDir <- FixPath(InputDir) + fin <- list.dirs(InputDir, recursive = FALSE) + fin <- CodeAndRoll2::grepv(x = fin, pattern = folderPattern, perl = FALSE) + + for (i in 1:length(fin)) { + print(i) + pathIN <- FixPath(fin[i]) + print(pathIN) + fnameIN <- basename(fin[i]) + subdir <- paste0(InputDir, fnameIN) + fnameOUT <- ppp(subdir, "min.cells", min.cells, "min.features", min.features, "Rds") + print(fnameOUT) + if (!overwrite) { + OutFile <- list.files(InputDir, pattern = basename(fnameOUT), recursive = TRUE) + if (length(OutFile) > 0) { + if (grepl(pattern = ".Rds$", OutFile, perl = TRUE)) { + iprint(" RDS OBJECT ALREADY EXISTS.") + next + } + } # if length + } + CountTable <- list.files(subdir, pattern = filePattern, recursive = FALSE) + stopifnot(length(CountTable) == 1) + count_matrix <- if (useVroom) { + vroom::vroom(file = kpps(subdir, CountTable), col_types = col_types.vroom) + } else { + readr::read_tsv(file = kpps(subdir, CountTable)) + } + + if (nrow(count_matrix) < minDimension | ncol(count_matrix) < minDimension) { + iprint("") + iprint(" EXPRESSION MATRIX TOO SMALL.", nrow(count_matrix), "x", ncol(count_matrix), ". Not processed.") + } else { + count_matrix <- FirstCol2RowNames(count_matrix)[, -1] # remove 1st "Cell column" # https://github.com/vertesy/SEO/issues/63 + seu <- CreateSeuratObject( + counts = count_matrix, project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + if (ncol(seu) < 1000) print("Only", ncol(seu), "cells survived filtering in the Seurat obj!") + if (nrow(seu) < 1000) print("Only", nrow(seu), "genes found in the Seurat obj!") + + # update HGNC --- --- --- --- --- + Sys.setenv("R_MAX_VSIZE" = 32000000000) + if (updateHGNC) seu <- UpdateGenesSeurat(seu, EnforceUnique = TRUE, ShowStats = TRUE) + saveRDS(seu, file = fnameOUT) + } + } +} + + +# _________________________________________________________________________________________________ +#' @title LoadAllSeurats +#' +#' @description This function loads all Seurat objects found in a directory. It also works with +#' symbolic links (but not with aliases). +#' @param InputDir A character string specifying the input directory. +#' @param file.pattern A character string specifying the pattern of file names to be searched. +#' Default: '^filtered.+Rds$'. +#' @param string.remove1 A character string or FALSE. If a string is provided, it is removed from +#' file names. Default: "filtered_feature_bc_matrix.". +#' @param string.replace1 A character string of the new text instead of "string.remove1". +#' @param string.remove2 A character string or FALSE. If a string is provided, it is removed from +#' file names. Default: ".min.cells.10.min.features.200.Rds". +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`.. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ls.Seurat <- LoadAllSeurats(InputDir = InputDir) +#' } +#' } +#' @export +#' @importFrom tictoc tic toc +LoadAllSeurats <- function( + InputDir, + file.pattern = "^filtered.+Rds$", + string.remove1 = list(FALSE, "filtered_feature_bc_matrix.", "raw_feature_bc_matrix.")[[2]], + string.replace1 = "", + string.remove2 = list(FALSE, ".min.cells.10.min.features.200.Rds")[[2]], + sort_alphanumeric = TRUE + ) { + tictoc::tic("LoadAllSeurats") + InputDir <- FixPath(InputDir) + + print(file.pattern) + use_rds <- grepl(pattern = "Rds", x = file.pattern) && !grepl(pattern = "qs", x = file.pattern) + print(use_rds) + + fin.orig <- list.files(InputDir, include.dirs = FALSE, pattern = file.pattern) + print(fin.orig) + print(length(fin.orig)) + stopifnot(length(fin.orig) > 0) + fin <- if (!isFALSE(string.remove1)) sapply(fin.orig, gsub, pattern = string.remove1, replacement = string.replace1) else fin.orig + fin <- if (!isFALSE(string.remove2)) sapply(fin, gsub, pattern = string.remove2, replacement = "") else fin + if (sort_alphanumeric) fin <- gtools::mixedsort(fin) + + + ls.Seu <- list.fromNames(fin) + for (i in 1:length(fin)) { + print(fin[i]) + FNP <- paste0(InputDir, fin.orig[i]) + # print(paste("Attempting to load file:", FNP)) # Debug print + + if (use_rds) { + ls.Seu[[i]] <- readRDS(FNP) + } else if (!use_rds) { + ls.Seu[[i]] <- qs::qread(file = FNP) + } else { + warning("File pattern ambigous. Use either qs or rds:", file.pattern, immediate. = TRUE) + } + } # for + print(tictoc::toc()) + return(ls.Seu) +} + + + + +# _________________________________________________________________________________________________ +#' @title Load 10X Genomics Data as Seurat Object +#' +#' @description Reads 10X Genomics dataset files (gzipped) including matrix, features, and barcodes, +#' to a single expression matrix. This function handles the unzipping of these files, reads the data, +#' and re-compresses the files back to their original gzipped format. +#' +#' @param dir A character string specifying the path to the directory containing the 10X dataset files. +#' This directory should contain `matrix.mtx.gz`, `features.tsv.gz`, and `barcodes.tsv.gz` files. +#' +#' @return A Seurat object containing the single-cell RNA-seq data extracted from the provided 10X +#' Genomics dataset. +#' +#' @details This function facilitates the loading of 10X Genomics datasets into R for analysis with +#' the Seurat package. It specifically caters to gzipped versions of the `matrix.mtx`, `features.tsv`, +#' and `barcodes.tsv` files, automating their decompression, reading, and subsequent recompression. +#' The function relies on Seurat's `Read10X` function for data reading and object construction. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Replace `path_to_10x_data` with the path to your 10X data directory +#' seuratObject <- read10x(dir = "path_to_10x_data") +#' # `seuratObject` is now a Seurat object containing the loaded 10X data +#' } +#' } +#' +#' @note Ensure that the specified directory contains the required gzipped files. +#' If the `features.tsv.gz` file is named differently (e.g., `genes.tsv.gz`), please rename it +#' accordingly before running this function. +#' +#' @seealso \code{\link[Seurat]{Read10X}} for the underlying function used to read the 10X data. +#' +#' @importFrom tictoc tic toc +#' @importFrom R.utils gunzip gzip +#' @importFrom Seurat Read10X +#' +#' @export +read10x <- function(dir) { + tictoc::tic("read10x") + names <- c("barcodes.tsv", "features.tsv", "matrix.mtx") + for (i in 1:length(names)) { + R.utils::gunzip(paste0(dir, "/", names[i], ".gz")) + } + file.copy(paste0(dir, "/features.tsv"), paste0(dir, "/genes.tsv")) + mat <- Seurat::Read10X(dir) + file.remove(paste0(dir, "/genes.tsv")) + for (i in 1:length(names)) { + R.utils::gzip(paste0(dir, "/", names[i])) + } + tictoc::toc() + mat +} + + + +# _________________________________________________________________________________________________ +#' @title .saveRDS.compress.in.BG +#' +#' @description Save and RDS object and compress resulting file in the background using system(gzip). OS X or unix. +#' @param obj Seurat object. +#' @param compress_internally Compress by R? Default: `FALSE`. (still compressed in background via CLI). +#' @param compr Compress at all? Default: `TRUE`. +#' @param fname File name +#' @param ... Additional parameters passed to saveRDS() function. +#' @seealso +#' \code{\link[tictoc]{tic}} +#' @importFrom tictoc tic toc +.saveRDS.compress.in.BG <- function(obj, compr = FALSE, fname, compress_internally = FALSE, ...) { + try(tictoc::tic(".saveRDS.compress.in.BG"), silent = TRUE) + saveRDS(object = obj, compress = compress_internally, file = fname, ...) + try(tictoc::toc(), silent = TRUE) + if (compr) system(command = paste0("gzip '", fname, "'"), wait = FALSE) # execute in the background + print(paste("Saved, optionally being .gz compressed", fname)) + try(say(), silent = TRUE) +} + + + + +# _________________________________________________________________________________________________ +#' @title isave.RDS +#' +#' @description Save an RDS object, using a faster and efficient compression method that runs in the background. +#' @param obj The object to be saved, typically a Seurat object. +#' @param prefix A string prefix added to the filename. Default: NULL. +#' @param suffix A string suffix added to the filename. Default: NULL. +#' @param inOutDir A boolean flag, if TRUE the OutDir is used as save directory, if FALSE the +#' alternative_path_rdata is used. Default: `TRUE`. +#' @param project A string representing the project code. This is appended to the saved file name. +#' Default: the active project determined by getProject(). +#' @param alternative_path_rdata A string that specifies the alternative path for storing the +#' RDS file if inOutDir is FALSE. Default: "~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/" +#' appended with the basename of OutDir. +#' @param homepath A string representing the homepath. Will be replaced by '~' in the file path. Default: '~/'. +#' @param showMemObject A boolean flag, if TRUE the function will print out the memory size of the +#' largest objects in the workspace. Default: `TRUE`.. +#' @param saveParams A boolean flag, if TRUE the parameters 'p' and 'all.genes' are added to the +#' 'misc' slot of the Seurat object if the object is of class Seurat. Default: `TRUE`.. +#' @param compress Compress .Rds file after writing? Default: `TRUE`.. +#' @param test_read Provide command to test validity by reading in the object just written. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' isave.RDS(my.R.object) +#' } +#' } +#' @export +isave.RDS <- function( + obj, prefix = NULL, suffix = NULL, inOutDir = TRUE, + project = getProject(), + alternative_path_rdata = paste0("~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/", basename(OutDir)), + homepath = if (Sys.info()[1] == "Darwin") "~/" else "/users/abel.vertesy/", + showMemObject = TRUE, saveParams = TRUE, + compress = TRUE, + test_read = FALSE) { + warning("isave.RDS() is deprecated. Use xsave() to save in .qs format.", immediate. = TRUE) + path_rdata <- if (inOutDir) OutDir else alternative_path_rdata + dir.create(path_rdata) + + if (showMemObject) { + try(memory.biggest.objects(), silent = TRUE) + } + if ("Seurat" %in% is(obj) & saveParams) { + try(obj@misc$p <- p, silent = TRUE) + try(obj@misc$all.genes <- all.genes, silent = TRUE) + } + fnameBase <- kppu(prefix, substitute(obj), project, suffix, idate(Format = "%Y.%m.%d_%H.%M")) + fnameBase <- trimws(fnameBase, whitespace = "_") + FNN <- paste0(path_rdata, fnameBase, ".Rds") + FNN <- gsub(pattern = "~/", replacement = homepath, x = FNN) + print(FNN) + if (test_read) { + print(paste0('xx5 <- read_rds(\\"', FNN, '\\")')) + } else { + Seurat.utils:::.saveRDS.compress.in.BG(obj = obj, fname = FNN, compr = compress, compress_internally = FALSE) + } +} + +# _________________________________________________________________________________________________ +#' @title Save an R Object Using 'qs' Package for Fast Compressed Saving +#' +#' @description This function saves an R object to a file in a quick and efficient format using the 'qs' package. +#' It constructs the file name based on various inputs and stores additional metadata if the object is a Seurat object. +#' The saving path can be adjusted by the presence of 'OutDir' in the global environment or defaults to the working directory. +#' +#' @param obj The R object to be saved. +#' @param suffix Optional; a suffix to add to the filename. +#' @param prefix Optional; a prefix to add to the filename. +#' @param nthreads Number of threads to use when saving, defaults to 12. +#' @param preset Compression preset, defaults to 'high'. +#' @param project The project name to be included in the filename, defaults to the result of `getProject()`. +#' @param dir Output Directory +#' @param showMemObject Logical; if TRUE, displays the memory size of the largest objects. +#' @param saveParams Logical; if TRUE and if the object is a Seurat object, additional parameters +#' are saved within it. +#' @param paramList Optional; a list of parameters to save within the Seurat object. +#' @param allGenes Optional; a list of all genes to save within the Seurat object. +#' @param saveLocation Logical; if TRUE and if the object is a Seurat object, file location is saved +#' into misc slot. +#' @param backgroundJob NOT IMPLEMENTED. Logical; if TRUE, the compression is done in the background. +#' @param v Verbose output. +#' +#' @return Invisible; The function is called for its side effects (saving a file) and does not return anything. +#' +#' @note The function uses the 'qs' package for quick and efficient serialization of objects and +#' includes a timing feature from the 'tictoc' package. +#' @seealso \code{\link[qs]{qsave}} for the underlying save function used. +#' @importFrom qs qsave +#' @importFrom tictoc tic toc +#' @importFrom job job +#' @importFrom rstudioapi isAvailable +#' +#' @export +xsave <- function( + obj, + suffix = NULL, + prefix = NULL, + nthreads = if (object.size(obj) < 1e7) 1 else .getNrCores(12), + preset = "high", + project = getProject(), + dir = if (exists("OutDir")) OutDir else getwd(), + showMemObject = TRUE, + saveParams = if (exists("p")) TRUE else FALSE, # save allGenes and paramList + paramList = if (exists("p")) p else NULL, + allGenes = if (exists("all.genes")) all.genes else NULL, + saveLocation = TRUE, + backgroundJob = FALSE, + v = TRUE) { + # + if (v) message(nthreads, " threads.\n-----------") + if (v) message("project: ", project) + + # check if the object is a Seurat object + obj_is_seurat <- inherits(obj, "Seurat") + if (obj_is_seurat) { + annot.suffix <- kpp(ncol(obj), "cells") + } else { + saveParams <- FALSE + annot.suffix <- if (is.list(obj)) kppd("ls", length(obj)) else NULL + } + + if(!isFALSE(saveParams)) message("paramList: ", if (exists("paramList")) paste(substitute(paramList), length(paramList), " elements.") else " not provided.") + if(!isFALSE(saveParams)) message("allGenes: ", if (exists("allGenes")) " found as global variable." else " not provided.") + + try(tictoc::tic("xsave"), silent = TRUE) + if (showMemObject & v) try(memory.biggest.objects(), silent = TRUE) + + fnameBase <- trimws(kppu( + prefix, substitute(obj), annot.suffix, suffix, project, + idate(Format = "%Y.%m.%d_%H.%M") + ), whitespace = "_") + + FNN <- paste0(dir, fnameBase, ".qs") + CMND <- paste0(substitute(obj), " <- xread('", FNN, "')") + if (v) message(CMND) + + if ("Seurat" %in% is(obj)) { + if (saveParams) { + if (exists("paramList")) try(obj@misc$"p" <- paramList, silent = TRUE) + if (exists("allGenes")) try(obj@misc$"all.genes" <- allGenes, silent = TRUE) + } + if (saveLocation) try(obj@misc$"file.location" <- CMND, silent = TRUE) + } + + qs::qsave(x = obj, file = FNN, nthreads = nthreads, preset = preset) + + try(tictoc::toc(), silent = TRUE) +} + +# _________________________________________________________________________________________________ +#' @title Read an R Object Using 'qs' Package for Fast Decompression +#' +#' @description This function reads an R object from a file saved in a format specific to the 'qs' package, +#' which is designed for quick and efficient compression and decompression of R objects. +#' It also times the read operation, providing feedback on the duration of the operation. +#' +#' @param file A character string specifying the path to the file where the R object is saved. +#' @param nthreads The number of threads to use when reading the object, defaults to 4. +#' @param loadParamsAndAllGenes Logical; if TRUE and if the object is a Seurat object, additional parameters +#' are loaded from within it. +#' @param overwriteParams Logical; if TRUE and if the object is a Seurat object, the parameters are overwritten. +#' @param overwriteAllGenes Logical; if TRUE and if the object is a Seurat object, the all genes are overwritten. +#' @param set_m Logical; if TRUE, the variable 'm', a list of @meta.data colnames, is assigned to +#' the global environment. +#' @param ... Further arguments passed on to the 'qs::qread' function. +#' +#' @return The R object that was saved in the specified file. +#' @note The function uses the 'qs' package for fast and efficient deserialization of objects +#' and includes a timing feature from the 'tictoc' package. +#' +#' @seealso \code{\link[qs]{qread}} for the underlying read function used. +#' @importFrom qs qread +#' @importFrom tictoc tic toc +#' @importFrom rstudioapi isAvailable +#' +#' @export +xread <- function(file, + nthreads = if (file.size(file) < 1e7) 1 else 4, + loadParamsAndAllGenes = TRUE, + overwriteParams = FALSE, + overwriteAllGenes = FALSE, + set_m = TRUE, + ...) { + stopifnot(file.exists(file)) + + message(nthreads, " threads.") + try(tictoc::tic("xread"), silent = TRUE) + + obj <- qs::qread(file = file, nthreads = nthreads, ...) + + report <- if (is(obj, "Seurat")) { + kppws("with", ncol(obj), "cells &", ncol(obj@meta.data), "meta colums.") + } else if (is.list(obj)) { + kppws("is a list of:", length(obj)) + } else { + kppws("of length:", length(obj)) + } + + + if ("Seurat" %in% is(obj)) { + if (loadParamsAndAllGenes) { + p_local <- obj@misc$"p" + all.genes_local <- obj@misc$"all.genes" + + if (is.null(p_local)) { + message("No parameter list 'p' found in object@misc.") + } else { + recall.parameters(obj = obj, overwrite = overwriteParams) + } + + if (is.null(all.genes_local)) { + message("No gene list 'all.genes' found in object@misc.") + } else { + recall.all.genes(obj = obj, overwrite = overwriteAllGenes) + } + } # loadParamsAndAllGenes + + if (set_m) { + # if (!exists("m")) { + # m <- list.fromNames(colnames(obj@meta.data)) + m <- lapply(data.frame(obj@meta.data), function(x) head(unique(x), 50)) + assign("m", m, envir = .GlobalEnv) + message("Variable 'm', a list of @meta.data colnames and first 50 uq values, is now defined in the global environment.") + # } else { + # message("Variable 'm' already exists in the global environment, not overwritten") + # } # exists("m") + } # set_m + } # Seurat + + + iprint(is(obj)[1], report) + try(tictoc::toc(), silent = TRUE) + invisible(obj) +} + + + +# _________________________________________________________________________________________________ +# Save workspace +# requires MarkdownReports (github) and defining OutDir +# requires github/vertesy/CodeAndRoll.r + +#' @title isave.image +#' +#' @description Save an image of the current workspace using a faster and efficient compression +#' method that runs in the background. +#' @param ... Additional parameters passed to the idate() function in the creation of the file name. +#' @param path_rdata A string that specifies the path for storing the image of the workspace. +#' Default: "~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/" appended with the basename of OutDir. +#' @param showMemObject A boolean flag, if TRUE the function will print out the memory size of the +#' largest objects in the workspace. Default: `TRUE`.. +#' @param options A string for gzip options. Default: "--force". +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' isave.image(my.R.image) +#' } +#' } +#' @export +#' @importFrom Stringendo kollapse iprint +isave.image <- function( + ..., path_rdata = paste0("~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/", basename(OutDir)), + showMemObject = TRUE, options = c("--force", NULL)[1]) { + dir.create(path_rdata) + + if (showMemObject) { + try(memory.biggest.objects(), silent = TRUE) + } + fname <- Stringendo::kollapse(path_rdata, "/", idate(), ..., ".Rdata") + print(fname) + if (nchar(fname) > 2000) stop() + save.image(file = fname, compress = FALSE) + iprint("Saved, being compressed", fname) + system(paste("gzip", options, fname), wait = FALSE) # execute in the background +} + + +# _________________________________________________________________________________________________ +#' @title Save workspace - qsave.image +#' +#' @description Faster saving of workspace, and compression outside R, when it can run in the background. +#' Seemingly quite CPU hungry and not very efficient compression. # +#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @param showMemObject Logical; if TRUE, the function will print out the memory size of the largest +#' objects in the workspace. Default: `TRUE`.. +#' @param options Options passed on to gzip, via CLI. Default: `c("--force", NULL)[1]` +#' @seealso +#' \code{\link[Stringendo]{kollapse}}, \code{\link[function]{iprint}} +#' @export +#' @importFrom Stringendo kollapse iprint +#' @importFrom tictoc tic toc +qsave.image <- function(..., showMemObject = TRUE + , options = c("--force", NULL)[1] + ) { + tictoc::tic("qsave.image") + + fname <- Stringendo::kollapse(getwd(), "/", basename(OutDir), idate(), ..., ".Rdata") + print(fname) + if (nchar(fname) > 2000) stop() + save.image(file = fname, compress = FALSE) + iprint("Saved, being compressed", fname) + system(paste("gzip", options, fname), wait = FALSE) # execute in the background + cat(tictoc::toc) +} + + +# _________________________________________________________________________________________________ +#' @title Find 'Outs' Subdirectories in Specified Subdirectories +#' +#' @description This function searches through specified subdirectories within a root directory +#' to find all subdirectories named 'outs' and returns a character vector with their full paths. +#' +#' @param root_dir The root directory. +#' @param subdir A character vector of subdirectory names within the root directory to be scanned. +#' @param recursive Boolean indicating whether to search recursively within subdirectories. +#' @return A character vector containing the full paths to the 'outs' subdirectories. +#' @importFrom fs dir_ls +#' @export +find10XoutputFolders <- function(root_dir, subdir, recursive = TRUE) { + stopifnot( + is.character(root_dir), length(root_dir) == 1, dir.exists(root_dir), + is.character(subdir), all(dir.exists(file.path(root_dir, subdir))), + is.logical(recursive) + ) + + outs_dirs <- c() + for (i in seq_along(subdir)) { + path <- file.path(root_dir, subdir[i]) + printProgress(i, length(subdir), "Processing subdirectory") + + iprint("Searching in:", path) + found_dirs <- fs::dir_ls(path, recurse = recursive, glob = "*/outs", type = "directory") + iprint(length(found_dirs), "output folders found.") + outs_dirs <- c(outs_dirs, found_dirs) + } + + # Replace root_dir in the paths with an empty string for printing + outs_print <- gsub(paste0("^", root_dir, "/?"), "", outs_dirs) + iprint(length(outs_dirs), outs_print) + + return(outs_dirs) +} + + +# _________________________________________________________________________________________________ +#' @title Clip Suffixes from 10X Cell Names +#' +#' @description Removes suffixes from cell names that are added by 10X technology and Seurat during data processing. +#' +#' @param cellnames A vector of cell names with potential suffixes. +#' @return A vector of cell names with suffixes removed. +#' @examples +#' cellnames <- c("cell1_1", "cell2_2") +#' clip10Xcellname(cellnames) +#' @export +#' @importFrom stringr str_split_fixed +clip10Xcellname <- function(cellnames) { + stringr::str_split_fixed(cellnames, "_", n = 2)[, 1] +} + +# _________________________________________________________________________________________________ +#' @title Add Suffix to Cell Names (e.g. lane suffix: _1) +#' +#' @description Appends a specified suffix to cell names to mimic lane suffixes used in 10X datasets. +#' +#' @param cellnames A vector of cell names without numeric suffixes. +#' @param suffix The suffix to add to each cell name. Default: '_1'. +#' @return A vector of cell names with the specified suffix appended. +#' @examples +#' cellnames <- c("cell1", "cell2") +#' make10Xcellname(cellnames) +#' @export +make10Xcellname <- function(cellnames, suffix = "_1") { + paste0(cellnames, suffix) +} + + + +# _________________________________________________________________________________________________ +# Soup.Analysis.of.ambient.RNA.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/Soup.Analysis.of.ambient.RNA.R') +# try (source('https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Soup.Analysis.of.ambient.RNA.R')) +# Source: self + web + + + +# _________________________________________________________________________________________________ +#' @title plotTheSoup +#' +#' @description Plot stats about the ambient RNA content in a 10X experiment. +#' +#' @param CellRanger_outs_Dir CellRanger 'outs' (output) directory, Default: '~/Data/114593/114593' +#' @param library_name Aka SampleName (the folder above 'outs;). +#' @param out_dir_prefix Prefix for the output directory. Default: 'SoupStatistics' +#' @param add_custom_class Add a custom class of genes, matched by apattern in gene symbol. Default: `TRUE`. +#' @param pattern_custom The pattern to match in gene symbol. Default: `NA`. +#' @param ls.Alpha The alpha value for the label text. Default: 0.5. +#' +#' @seealso +#' \code{\link[Matrix]{colSums}} +#' \code{\link[tibble]{rownames}} +#' \code{\link[ggrepel]{geom_label_repel}} +#' +#' @importFrom Matrix rowSums +#' @importFrom tibble rownames_to_column +#' @importFrom ggrepel geom_text_repel +#' @importFrom Stringendo percentage_formatter +#' @importFrom MarkdownReports wbarplot create_set_OutDir +#' @importFrom MarkdownHelpers ww.assign_to_global +#' @importFrom dplyr as_tibble +#' +#' @export +plotTheSoup <- function(CellRanger_outs_Dir = "~/Data/114593/114593", + # library_name = str_extract(CellRanger_outs_Dir, "[[:alnum:]_]+(?=/outs/)"), + library_name = basename(gsub("/outs","",CellRanger_outs_Dir)), + out_dir_prefix = 'SoupStatistics', + add_custom_class = F, pattern_custom = "\\.RabV$", + ls.Alpha = 1) { + + iprint("library_name:", library_name) + + stopifnot( # Check input + is.character(CellRanger_outs_Dir), dir.exists(CellRanger_outs_Dir), + nchar(library_name) > 4, is.character(out_dir_prefix), nchar(out_dir_prefix) > 0, + is.numeric(ls.Alpha) + ) + + if(add_custom_class) iprint("pattern_custom:", pattern_custom) + + # The regular expression `[[:alnum:]_]+(?=/outs/)` matches one or more alphanumeric characters or + # underscores that are followed by the `/outs/` portion in the string. It ensures that the desired + # substring is captured, but it does not include the `/outs/` in the matched result. + # `[[:alnum:]_]+` matches one or more alphanumeric characters or underscores. The `[:alnum:]` + # character class represents all alphabetic characters (both uppercase and lowercase) and digits. + # The underscore character `_` is included as well. The `+` quantifier specifies that there should + # be one or more occurrences of these characters in a row. + # `(?=/outs/)` This is a positive lookahead assertion. It matches a position in the string where + # `/outs/` is present immediately after. It doesn't consume any characters from the string; it + # just checks for the presence of `/outs/` after the matched substring. + + + Subfolders_10X_outs <- list.dirs(CellRanger_outs_Dir, full.names = FALSE, recursive = FALSE) + stopifnot(length(Subfolders_10X_outs) > 0) + + # Identify raw and filtered files ___________________________________ + path.raw <- file.path(CellRanger_outs_Dir, grep(x = Subfolders_10X_outs, pattern = "^raw_*", value = TRUE)) + path.filt <- file.path(CellRanger_outs_Dir, grep(x = Subfolders_10X_outs, pattern = "^filt_*", value = TRUE)) + CR.matrices <- list.fromNames(c("raw", "filt")) + + # Adapter for Markdownreports background variable "OutDir" + OutDirBac <- if (exists("OutDir")) OutDir else getwd() + OutDir <- file.path(CellRanger_outs_Dir, paste0(kpp(out_dir_prefix, library_name))) + + MarkdownReports::create_set_OutDir(OutDir) + MarkdownHelpers::ww.assign_to_global("OutDir", OutDir, 1) + + # Read raw and filtered data ___________________________________ + print("Reading raw CellRanger output matrices") + CR.matrices$"raw" <- Seurat::Read10X(path.raw) + if (length(CR.matrices$"raw") == 2) { + CR.matrices$"raw" <- CR.matrices$"raw"[[1]] + } # Maybe AB table is present too at slot 2! + + + print("Reading filtered CellRanger output matrices") + CR.matrices$"filt" <- Seurat::Read10X(path.filt) + if (length(CR.matrices$"filt") == 2) { + CR.matrices$"filt" <- CR.matrices$"filt"[[1]] + } # Maybe AB table is present too at slot 2! + + + # Profiling the soup ___________________________________ + print("Profiling the soup") + GEMs.all <- CR.matrices$"raw"@Dimnames[[2]] + GEMs.cells <- CR.matrices$"filt"@Dimnames[[2]] + iprint("There are", length(GEMs.all), "GEMs sequenced, and", length(GEMs.cells), "are cells among those.") + EmptyDroplets.and.Cells <- c("EmptyDroplets" = length(GEMs.all) - length(GEMs.cells), "Cells" = length(GEMs.cells)) + ggExpress::qbarplot(EmptyDroplets.and.Cells, label = EmptyDroplets.and.Cells, palette_use = "npg", col = 1:2, ylab = "GEMs") + + GEMs.soup <- setdiff(GEMs.all, GEMs.cells) + CR.matrices$"soup" <- CR.matrices$"raw"[, GEMs.soup] + CR.matrices$"soup.total.RC" <- Matrix::rowSums(CR.matrices$"soup") + CR.matrices$"soup.total.sum" <- sum(CR.matrices$"soup") + CR.matrices$"cells.total.sum" <- sum(CR.matrices$"filt") + + CR.matrices$"soup.rel.RC" <- CR.matrices$"soup.total.RC" / CR.matrices$"soup.total.sum" + + # Diff Exp ___________________________________ + Soup.VS.Cells.Av.Exp <- cbind( + "Soup" = Matrix::rowSums(CR.matrices$"soup"), + "Cells" = Matrix::rowSums(CR.matrices$"filt") + ) + colnames(Soup.VS.Cells.Av.Exp) + idx.HE <- rowSums(Soup.VS.Cells.Av.Exp) > 10 + pc_TRUE(idx.HE) + Soup.VS.Cells.Av.Exp <- Soup.VS.Cells.Av.Exp[idx.HE, ] + idim(Soup.VS.Cells.Av.Exp) + Soup.VS.Cells.Av.Exp.log10 <- log10(Soup.VS.Cells.Av.Exp + 1) + + # ggplot prepare ___________________________________ + Soup.VS.Cells.Av.Exp.gg <- tibble::rownames_to_column(as.data.frame(Soup.VS.Cells.Av.Exp.log10), "gene") + (Soup.VS.Cells.Av.Exp.gg <- dplyr::as_tibble(Soup.VS.Cells.Av.Exp.gg)) + soup.rate <- Soup.VS.Cells.Av.Exp.gg$Soup / (Soup.VS.Cells.Av.Exp.gg$Cells + Soup.VS.Cells.Av.Exp.gg$Soup) + cell.rate <- Soup.VS.Cells.Av.Exp.gg$Cells / (Soup.VS.Cells.Av.Exp.gg$Cells + Soup.VS.Cells.Av.Exp.gg$Soup) + + axl.pfx <- "Total Expression in" + axl.sfx <- "[log10(mRNA+1)]" + + HGNC <- Soup.VS.Cells.Av.Exp.gg$gene + Class <- rep("Other", times = nrow(Soup.VS.Cells.Av.Exp.gg)) + Class[grep("^RPL|^RPS", HGNC)] <- "RP" + Class[grep("^MT-", HGNC)] <- "MT" + Class[grep("^LINC", HGNC)] <- "LINC" + Class[grep("^AC", HGNC)] <- "AC" + Class[grep("^AL", HGNC)] <- "AL" + if (add_custom_class) Class[grep(pattern_custom, HGNC)] <- ReplaceSpecialCharacters(pattern_custom, remove_dots = T) + Nr.of.Genes.per.Class <- table(Class) + + + ggExpress::qpie(Nr.of.Genes.per.Class) + Soup.VS.Cells.Av.Exp.gg$Class <- Class + + fname <- kpp("Soup.VS.Cells.Av.Exp.GeneClasses", library_name, "pdf") + pgg <- + ggplot( + Soup.VS.Cells.Av.Exp.gg |> + arrange(-nchar(Class)), aes(x = Soup, y = Cells, label = gene, col = Class) + ) + + geom_abline(slope = 1, col = "darkgrey") + + geom_point() + + scale_alpha_manual(guide = "none", values = ls.Alpha) + + xlab(paste(axl.pfx, "Soup", axl.sfx)) + + ylab(paste(axl.pfx, "Cells", axl.sfx)) + + ggtitle("Soup VS. Cells | gene classes") + + ggsave(pgg, filename = file.path(OutDir, fname), width = 7, height = 7) + + # ggplot ___________________________________ + quantiles <- c(0.025, 0.01, 0.0025) + + i <- 1 + for (i in 1:length(quantiles)) { + pr <- quantiles[i] + print(pr) + HP.thr <- 200 * pr / quantiles[2] + idx.HE2 <- rowSums(Soup.VS.Cells.Av.Exp) > HP.thr + pc_TRUE(idx.HE2) + + fname <- kpp("Soup.VS.Cells.Av.Exp.quantile", pr, library_name, "pdf") + + Outlier <- idx.HE2 & + (cell.rate < quantile(cell.rate, probs = pr) | + soup.rate < quantile(soup.rate, probs = pr)) + + pc_TRUE(Outlier) + sum(Outlier) + HP.thr.mod <- HP.thr + while (sum(Outlier) > 40) { + HP.thr.mod <- HP.thr.mod * 2 + Outlier <- Outlier & rowSums(Soup.VS.Cells.Av.Exp) > HP.thr.mod + } + + pgg <- + ggplot(Soup.VS.Cells.Av.Exp.gg, aes( + x = Soup, y = Cells, label = gene, + col = Outlier + )) + + geom_point() + + theme(legend.position = "none") + + xlab(paste(axl.pfx, "Soup", axl.sfx)) + + ylab(paste(axl.pfx, "Cells", axl.sfx)) + + ggtitle("Soup VS. Cells", subtitle = pr) + + ggrepel::geom_text_repel(aes(label = ifelse(Outlier, + as.character(gene), "" + ))) + ggsave(pgg, filename = file.path(OutDir, fname), width = 7, height = 7) + + + } # for + + + # Per Gene ___________________________________ + PC.mRNA.in.Soup <- sum(CR.matrices$"soup") / sum(CR.matrices$"raw") + PC.mRNA.in.Cells <- 100 * sum(CR.matrices$"filt") / sum(CR.matrices$"raw") + MarkdownReports::wbarplot( + variable = PC.mRNA.in.Cells, col = "seagreen", plotname = kppd("PC.mRNA.in.Cells", library_name), + ylim = c(0, 100), ylab = "% mRNA in cells", + sub = "% mRNA is more meaningful than % reads reported by CR" + ) + barplot_label( + barplotted_variable = PC.mRNA.in.Cells, + labels = Stringendo::percentage_formatter(PC.mRNA.in.Cells / 100, digitz = 2), + TopOffset = 10 + ) + + + # Plot top gene's expression ___________________________________ + Soup.GEMs.top.Genes <- 100 * head(sort(CR.matrices$"soup.rel.RC", decreasing = TRUE), n = 20) + + MarkdownReports::wbarplot(Soup.GEMs.top.Genes, + plotname = kppd("Soup.GEMs.top.Genes", library_name), + ylab = "% mRNA in the Soup", + sub = paste("Within the", library_name, "dataset"), + tilted_text = TRUE, + ylim = c(0, max(Soup.GEMs.top.Genes) * 1.5) + ) + barplot_label( + barplotted_variable = Soup.GEMs.top.Genes, + labels = Stringendo::percentage_formatter(Soup.GEMs.top.Genes / 100, digitz = 2), + TopOffset = -.5, srt = 90, cex = .75 + ) + + # Plot summarize expression ___________________________________ + soupProfile <- CR.matrices$"soup.total.RC" + { + soup.RP.sum <- sum(soupProfile[grep("^RPL|^RPS", names(soupProfile))]) + soup.RPL.sum <- sum(soupProfile[grep("^RPL", names(soupProfile))]) + soup.RPS.sum <- sum(soupProfile[grep("^RPS", names(soupProfile))]) + soup.mito.sum <- sum(soupProfile[grep("^MT-", names(soupProfile))]) + soup.LINC.sum <- sum(soupProfile[grep("^LINC", names(soupProfile))]) + soup.AC.sum <- sum(soupProfile[grep("^AC", names(soupProfile))]) + soup.AL.sum <- sum(soupProfile[grep("^AL", names(soupProfile))]) + genes.non.Above <- soupProfile[CodeAndRoll2::grepv("^RPL|^RPS|^MT-|^LINC|^AC|^AL", names(soupProfile), invert = TRUE)] + } + head(sort(genes.non.Above), n = 50) + + + soupProfile.summarized <- c( + "Mitochondial" = soup.mito.sum, + "Ribosomal" = soup.RP.sum, + "Ribosomal.L" = soup.RPL.sum, + "Ribosomal.S" = soup.RPS.sum, + "GenBank (AC)" = soup.AC.sum, + "EMBL (AL)" = soup.AL.sum, + "LINC" = soup.LINC.sum, + sort(genes.non.Above, decreasing = TRUE) + ) + NrColumns2Show <- min(10, nrow(soupProfile.summarized)) + ccc <- c("#FF4E00", "#778B04", "#8ea604", "#8ea604", "#F5BB00", "#F5BB00", "#EC9F05", rep(x = "#BF3100", times = NrColumns2Show - 6)) + + + Soup.GEMs.top.Genes.summarized <- 100 * soupProfile.summarized[1:NrColumns2Show] / CR.matrices$"soup.total.sum" + maxx <- max(Soup.GEMs.top.Genes.summarized) + MarkdownReports::wbarplot(Soup.GEMs.top.Genes.summarized, + plotname = kppd("Soup.GEMs.top.Genes.summarized", library_name), + ylab = "% mRNA in the Soup", ylim = c(0, maxx + 3), + sub = paste("Within the", library_name, "dataset"), + tilted_text = TRUE, col = ccc + ) + barplot_label( + barplotted_variable = Soup.GEMs.top.Genes.summarized, + srt = 45, labels = Stringendo::percentage_formatter(Soup.GEMs.top.Genes.summarized / 100, digitz = 2), + TopOffset = -1.5 + ) + + # Absolute.fraction ___________________________________ + Absolute.fraction.soupProfile.summarized <- Soup.GEMs.top.Genes.summarized * PC.mRNA.in.Soup + + maxx <- max(Absolute.fraction.soupProfile.summarized) + MarkdownReports::wbarplot(Absolute.fraction.soupProfile.summarized, + plotname = kppd("Absolute.fraction.soupProfile.summarized", library_name), + ylab = "% of mRNA in cells", ylim = c(0, maxx * 1.33), + sub = paste(Stringendo::percentage_formatter(PC.mRNA.in.Soup), "of mRNA counts are in the Soup, in the dataset ", library_name), + tilted_text = TRUE, col = ccc + ) + barplot_label( + barplotted_variable = Absolute.fraction.soupProfile.summarized, + srt = 45, labels = Stringendo::percentage_formatter(Absolute.fraction.soupProfile.summarized / 100, digitz = 2) + # formatC(Absolute.fraction.soupProfile.summarized, format="f", big.mark = " ", digits = 0) + , TopOffset = -maxx * 0.15 + ) + + # ___________________________________ + Soup.GEMs.top.Genes.non.summarized <- 100 * sort(genes.non.Above, decreasing = TRUE)[1:20] / CR.matrices$"soup.total.sum" + maxx <- max(Soup.GEMs.top.Genes.non.summarized) + MarkdownReports::wbarplot(Soup.GEMs.top.Genes.non.summarized, + plotname = kppd("Soup.GEMs.top.Genes.non.summarized", library_name), + ylab = "% mRNA in the Soup", + sub = paste("Within the", library_name, "dataset"), + tilted_text = TRUE, col = "#BF3100", + ylim = c(0, maxx * 1.5) + ) + barplot_label( + barplotted_variable = Soup.GEMs.top.Genes.non.summarized, + labels = Stringendo::percentage_formatter(Soup.GEMs.top.Genes.non.summarized / 100, digitz = 2), + TopOffset = -maxx * 0.2, srt = 90, cex = .75 + ) + + if (exists("OutDirBac")) MarkdownHelpers::ww.assign_to_global("OutDir", OutDirBac, 1) +} # plotTheSoup + + + + + +# _________________________________________________________________________________________________ +# Jaccard.toolkit _____________________________ ---- +# _________________________________________________________________________________________________ +# try(source('~/GitHub/Packages/Seurat.utils/Functions/Jaccard.toolkit.R')) +# try(source('https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Jaccard.toolkit.R')) + + +# __________________________________________ +# Fast direct calculation from a list + + +# _________________________________________________________________________________________________ +#' @title jJaccardIndexVec +#' +#' @description Calculate jaccard similarity for 2 vecotrs. Helper to jPairwiseJaccardIndexList. +#' @param A Set A, Default: 1:3 +#' @param B Set B, Default: 2:4 +#' @export +jJaccardIndexVec <- function(A = 1:3, B = 2:4) length(intersect(A, B)) / length(union(A, B)) + +# _________________________________________________________________________________________________ +#' @title jPairwiseJaccardIndexList +#' +#' @description Create a pairwise jaccard similarity matrix across all combinations of columns in +#' binary.presence.matrix. Modified from: +#' https://www.displayr.com/how-to-calculate-jaccard-coefficients-in-displayr-using-r/ +#' @param lsG List of genes, Default: ls_genes +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' jPairwiseJaccardIndexList(lsG = ls_genes) +#' } +#' } +#' @export +#' @importFrom Stringendo percentage_formatter +jPairwiseJaccardIndexList <- function(lsG = ls_genes) { + if (length(names(lsG)) < length(lsG)) { + iprint("Gene lists were not (all) named, now renamed as:") + names(lsG) <- ppp("dataset", 1:length(lsG)) + print(names(lsG)) + } + m <- matrix.fromNames(rowname_vec = names(lsG), colname_vec = names(lsG)) + n.sets <- length(lsG) + for (r in 1:n.sets) { + # print(Stringendo::percentage_formatter(r/n.sets)) + for (c in 1:n.sets) { + if (c == r) { + m[r, c] <- 1 + } else { + m[r, c] <- signif(jJaccardIndexVec(lsG[[r]], lsG[[c]]), digits = 2) + } + } + } + return(m) +} + + +# Much slower Indirect calculation via PresenceMatrix +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +#' @title jPresenceMatrix +#' +#' @description Make a binary presence matrix from a list. Source: +#' https://stackoverflow.com/questions/56155707/r-how-to-create-a-binary-relation-matrix-from-a-list-of-strings # +#' @param string_list List of strings to compare overlapping entries. +#' Default: lst(a = 1:3, b = 2:5, c = 4:9, d = -1:4) +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' df.presence <- jPresenceMatrix(string_list = lst(a = 1:3, b = 2:5, c = 4:9, d = -1:4)) +#' } +#' } +#' @export +jPresenceMatrix <- function(string_list = lst(a = 1:3, b = 2:5, c = 4:9, d = -1:4)) { + df.presence <- string_list |> + enframe() |> + unnest(cols = "value") |> + count(name, value) |> + spread(value, n, fill = 0) + df.presence2 <- FirstCol2RowNames(df.presence) + return(t(df.presence2)) +} + + +# _________________________________________________________________________________________________ +#' @title jJaccardIndexBinary +#' +#' @description Calculate Jaccard Index. Modified from: +#' https://www.displayr.com/how-to-calculate-jaccard-coefficients-in-displayr-using-r/ # +#' @param x Set X +#' @param y Set Y +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' JaccardSimilarity <- jJaccardIndexBinary( +#' x = sample(x = 0:1, size = 100, replace = TRUE), +#' y = sample(x = 0:1, size = 100, replace = TRUE) +#' ) +#' } +#' } +#' @export +jJaccardIndexBinary <- function(x, y) { + elements.found <- sort(unique(union(x, y))) + stopifnot(length(elements.found) == 2) # check if you only have [0,1] + stopifnot(as.numeric(elements.found) == 0:1) # check if you only have [0,1] + + M.11 <- sum(x == 1 & y == 1) + M.10 <- sum(x == 1 & y == 0) + M.01 <- sum(x == 0 & y == 1) + return(M.11 / (M.11 + M.10 + M.01)) +} + + + +# _________________________________________________________________________________________________ +#' @title jPairwiseJaccardIndex +#' +#' @description Create a pairwise jaccard similarity matrix across all combinations of columns in +#' binary.presence.matrix. Modified from: +#' https://www.displayr.com/how-to-calculate-jaccard-coefficients-in-displayr-using-r/ +#' @param binary.presence.matrix A boolean matrix. Default: df.presence +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' PairwiseJaccardIndices <- jPairwiseJaccardIndex(binary.presence.matrix = df.presence) +#' } +#' } +#' @export +#' @importFrom Stringendo percentage_formatter +jPairwiseJaccardIndex <- function(binary.presence.matrix = df.presence) { + m <- matrix.fromNames(rowname_vec = colnames(binary.presence.matrix), colname_vec = colnames(binary.presence.matrix)) + n.sets <- ncol(binary.presence.matrix) + for (r in 1:n.sets) { + print(Stringendo::percentage_formatter(r / n.sets)) + for (c in 1:n.sets) { + if (c == r) { + m[r, c] <- 1 + } else { + m[r, c] <- signif(jJaccardIndexBinary(binary.presence.matrix[, r], binary.presence.matrix[, c]), digits = 2) + } + } + } + return(m) +} + + +# _________________________________________________________________________________________________ +# Variable Features _____________________________ ------ +# _________________________________________________________________________________________________ + +#' @title Compare variable features and their ranks in two Seurat objects. +#' +#' @description This function compares variable features (genes) between two Seurat objects, +#' reporting the number of genes in each, the percentage of common genes, the percentage +#' of unique genes in each object, and the similarity in the ranking of overlapping genes +#' using Spearman's rank correlation coefficient. Optionally, it can also generate a scatterplot +#' of the ranks of common genes using ggpubr's ggscatter. The function returns the common genes +#' and the Spearman's rank correlation coefficient. +#' +#' @param obj1 The first Seurat object for comparison. Default: NULL. +#' @param obj2 The second Seurat object for comparison. Default: NULL. +#' @param cor.plot An optional boolean indicating whether to generate a scatterplot of the ranks +#' of common genes. Default: `FALSE`.. +#' @param plot_venn plot_venn +#' @param suffix suffix +#' @param save.plot save.plot +#' @return A list containing the common genes and Spearman's rank correlation coefficient. +#' If cor.plot is TRUE, a scatterplot is also generated. +#' @importFrom Seurat VariableFeatures +#' @importFrom stats cor +#' @importFrom ggExpress qvenn qscatter +#' @examples +#' # Assuming obj1 and obj2 are Seurat objects +#' result <- compareVarFeaturesAndRanks(obj1, obj2, cor.plot = TRUE) +#' @export +compareVarFeaturesAndRanks <- function( + obj1 = NULL, obj2 = NULL, cor.plot = TRUE, save.plot = TRUE, + plot_venn = TRUE, + suffix = NULL, + ...) { + stopifnot(!is.null(obj1), !is.null(obj2)) + stopifnot(is(obj1, "Seurat"), is(obj2, "Seurat")) + + name1 <- deparse(substitute(obj1)) + name2 <- deparse(substitute(obj2)) + + var.genes1 <- Seurat::VariableFeatures(obj1) + var.genes2 <- Seurat::VariableFeatures(obj2) + + if (plot_venn) { + variable.genes.overlap <- list(var.genes1, var.genes2) + names(variable.genes.overlap) <- c(name1, name2) + ggExpress::qvenn(list = variable.genes.overlap, suffix = sppp(suffix, c(name1, name2))) + } + + nr_genes1 <- length(var.genes1) + nr_genes2 <- length(var.genes2) + common_genes <- intersect(var.genes1, var.genes2) + percent_common <- length(common_genes) / max(nr_genes1, nr_genes2) * 100 + percent_uniq1 <- (nr_genes1 - length(common_genes)) / nr_genes1 * 100 + percent_uniq2 <- (nr_genes2 - length(common_genes)) / nr_genes2 * 100 + + ranks1 <- match(common_genes, var.genes1) + ranks2 <- match(common_genes, var.genes2) + + spearman_correlation <- cor(ranks1, ranks2, method = "spearman") + + stopifnot(is.numeric(spearman_correlation)) + + cat(sprintf("Nr of genes in obj1: %d\n", nr_genes1)) + cat(sprintf("Nr of genes in obj2: %d\n", nr_genes2)) + cat(sprintf("%% Common genes: %.2f%%\n", percent_common)) + cat(sprintf("%% Unique genes in obj1: %.2f%%\n", percent_uniq1)) + cat(sprintf("%% Unique genes in obj2: %.2f%%\n", percent_uniq2)) + cat(sprintf("Spearman's rank correlation: %.2f\n", spearman_correlation)) + + if (cor.plot) { + plot_data <- data.frame(ranks1, ranks2) + colnames(plot_data) <- paste("Rank in", c(name1, name2)) + TTL <- paste("Spearman Rank Correlation of Shared Variable Genes") + + SUB <- paste( + "between objects:", name1, "&", name2, "\n", + length(common_genes), "or", percent_common, "% overlap from objects:", + nr_genes1, "&", nr_genes2, "genes." + ) + CPT <- paste("median ranks:", median(ranks1), "/", median(ranks2)) + file_name <- paste0( + "Spearman_Rank_Correlation_of_", + name1, "_and_", name2, + "_", sprintf("%.2f", spearman_correlation), ".png" + ) + print(head(plot_data)) + plt <- ggExpress::qscatter( + df_XYcol = plot_data, + plotname = TTL, + subtitle = SUB, + caption = CPT, + # abline = c(0,1), + save = save.plot, + filename = file_name, + correlation_r2 = TRUE, + also.pdf = FALSE, + cor.coef = TRUE, cor.method = "spearman", + ... + ) + print(plt) + } + + unique.genes <- symdiff(var.genes1, var.genes2) + names(unique.genes) <- paste0("Unique.", c(name1, name2)) + return(list( + "common_genes" = common_genes, + "unique.genes" = unique.genes, + "spearman_correlation" = spearman_correlation + )) +} + + +# _________________________________________________________________________________________________ +# Helper Functions _____________________________ ------ +# _________________________________________________________________________________________________ + + + +# _________________________________________________________________________________________________ +#' @title Get the number of CPUs to use for CBE processing +#' +#' @description This function checks for the presence of a global `CBE.params` list and, +#' if found and contains a `cpus` entry, returns the number of CPUs specified by `cpus` minus one. +#' Otherwise, it returns a default number of CPUs. +#' +#' @param n.cpus.def The default number of CPUs to return if `CBE.params` does not exist +#' or does not contain a `cpus` entry. Defaults to 8. +#' +#' @return The number of CPUs to use for CBE processing. If `CBE.params$cpus` is set, +#' returns `CBE.params$cpus - 1`, ensuring at least 1 CPU is returned. Otherwise, returns `n.cpus.def`. +#' +#' @examples +#' # Assuming CBE.params does not exist or does not have a `cpus` entry +#' getCPUsCBE() # returns 8 by default +#' +#' # Assuming CBE.params exists and has a `cpus` entry of 4 +#' getCPUsCBE() # returns 3 +#' @export +#' +.getNrCores <- function(n.cpus.def = 8) { + # n_cores_detected <- as.numeric(system("nproc", intern = TRUE)) + n_cores_detected <- as.numeric(system("echo $SLURM_CPUS_PER_TASK", intern = TRUE)) + n_cores_avail <- min(n_cores_detected, n.cpus.def) + return(max(n_cores_avail, 1) ) +} + + +# _________________________________________________________________________________________________ +#' @title Check List Elements +#' +#' @description Tests if list elements are defined and reports the value or issues a warning. +#' +#' @param param_list A list containing variables to be checked. Default: `NULL`. +#' @param elements A character vector of element names in `param_list` to check. +#' Default: `character(0)`. +#' +#' @return A message for each element that is defined, and a warning for elements that are not. +#' @examples +#' param_list <- list(a = 1, b = NULL) +#' elements <- c("a", "b", "c") +#' .checkListElements(param_list, elements) +.checkListElements <- function(param_list = NULL, elements = character(0)) { + stopifnot( + is.list(param_list), + is.character(elements) + ) + + sapply(elements, function(element) { + if (is.null(param_list[[element]])) { + warning(sprintf("`%s` is not defined", element), immediate. = TRUE, call. = FALSE) + } else { + message(sprintf("`%s` is: %s", element, param_list[[element]]) ) + } + }, USE.NAMES = FALSE) + + invisible(NULL) +} + + +# _________________________________________________________________________________________________ +#' @title Get number of scaled features +#' +#' @param obj A Seurat object containing scaled data in `obj@assays$RNA@scale.data`. +#' @param assay The name of the assay to search for scaled data. Default: `RNA`. +#' @param v Verbose? Default: `TRUE`. +#' +#' @return Integer representing the number of scaled features +.getNrScaledFeatures <- function(obj, assay = Seurat::DefaultAssay(obj), + v = TRUE) { + res <- NA + if (v) message(" > Running .getNrScaledFeatures...") + if (v) message("Seurat version: ", obj@version, " | Assay searched: ", assay) + + + if (obj@version >= "5") { # Check if Seurat version is 5 or higher + if ("scale.data" %in% names(obj@assays[[assay]]@layers)) { + res <- nrow(obj@assays[[assay]]@layers[["scale.data"]]) + } else { + if (v) warning("No scaled data found in object.", immediate. = TRUE) + } + } else { # For Seurat versions below 5 + if ("scale.data" %in% names(obj@assays[[assay]])) { + res <- nrow(obj@assays[[assay]][["scale.data"]]) + } else { + if (v) warning("No scaled data found in object.", immediate. = TRUE) + } + } + return(res) +} + + + +# _________________________________________________________________________________________________ +#' @title Get number of principal components +#' +#' @param obj A Seurat object containing PCA cell embeddings in `reductions$pca@cell.embeddings` +#' @param v Verbose? Default: `TRUE`. +#' @return Integer representing the number of principal components +#' +.getNrPCs <- function(obj, v = TRUE, reduc = "pca") { + if("pca" %in% names(obj@reductions)) { + ncol(obj@reductions[[reduc]]@"cell.embeddings") + } else { + if(v) warning("No PCA cell embeddings found in object.", immediate. = TRUE) + NA + } +} + +# _________________________________________________________________________________________________ +#' @title Parse regression variables for name tagging +#' +#' @description This function extracts the regression variables from the `@commands` slot of a Seurat object. +#' If no regression variables are found, a message is printed. +#' @param obj A Seurat object +#' @param assay The name of the assay to search for scaled data. Default: `DefaultAssay()`. +#' @param v Verbose? Default: `TRUE`. +#' +#' @return Integer representing the number of principal components +.getRegressionVariablesForScaleData <- function(obj, assay = Seurat::DefaultAssay(obj), v = TRUE, ...) { + if (v) message(" > Running .getRegressionVariablesForScaleData...") + + # Input assertions + stopifnot(is(obj, "Seurat"), is.character(assay)) + + # Check if the "commands" slot exists in the object + if (!"commands" %in% slotNames(obj)) { + if (v) warning("No commands slot found in object.", immediate. = TRUE) + return(NULL) + } + + # Find the ScaleData command using the helper function + func_slot <- .FindCommandInObject(obj, pattern = paste0("^ScaleData.", assay)) + + if (is.null(func_slot)) { + if (v) message("No ScaleData command found in @commands.") + return(NULL) + } + + # Extract regression variables + regressionVariables <- func_slot$'vars.to.regress' + if (is.null(regressionVariables)) { + if (v) message("No regression variables found in @commands") + } else { + if (v) message("Regression variables found in @commands: ", paste(regressionVariables, collapse = ", ")) + } + + return(regressionVariables) +} + + + + +# _________________________________________________________________________________________________ +#' @title Parse key parameters from an object and format as a string +#' +#' @description This function extracts the number of scaled features, the number of principal components, +#' and formats additional information including regression variables. +#' @param obj An object to extract information from. +#' @param regressionVariables A list or vector containing variables for regression. Default: NULL. +#' If NULL, the function will attempt to extract the variables from the `object@commands$ScaleData`. +#' @param nrVarFeatures You can provide this number manually. Default: NULL. +#' @param return.as.name If TRUE, returns the name of the object. Default: `FALSE`.. +#' @param assay The assay to extract scaled features from. Default: "RNA". +#' @param suffix A suffix string to add. +#' @param v Verbose? Default: `TRUE`. +#' @return A character string summarizing the key parameters. + +.parseKeyParams <- function(obj, + regressionVariables = NULL, + nrVarFeatures = NULL, + return.as.name = FALSE, + assay = Seurat::DefaultAssay(obj), + suffix = NULL, + v = T + ) { + + tictoc::tic('.parseKeyParams') + + if(v) message(" > Running .parseKeyParams...") + scaledFeatures <- .getNrScaledFeatures(obj, assay, v= F) + + if (is.null(regressionVariables)) regressionVariables <- .getRegressionVariablesForScaleData(obj = obj, assay = assay, v = F) + + if (!is.null(nrVarFeatures)) { + if (nrVarFeatures != scaledFeatures) { + warning("nrVarFeatures != scaledFeatures. Reporting nrVarFeatures: ", nrVarFeatures, immediate. = TRUE) + } + scaledFeatures <- nrVarFeatures + } # else use scaledFeatures + + pcs <- .getNrPCs(obj) + regressionInfo <- kppc(regressionVariables) + reg <- if (!is.null(regressionVariables)) paste0(regressionInfo, " regressed out") else "no regression" + if (return.as.name) { + reg <- ReplaceSpecialCharacters(RemoveWhitespaces(reg, replacement = ".")) + tag <- kpp(scaledFeatures, "ScaledFeatures", pcs, "PCs", reg, suffix) + } else { + tag <- paste0(scaledFeatures, " ScaledFeatures | ", pcs, " PCs | ", reg, " ", suffix) + } + tictoc::toc() + return(tag) +} + + +# _________________________________________________________________________________________________ +#' @title Find Command in Seurat Object by Partial Match +#' +#' @description +#' This function searches for commands in a list within a Seurat object using a partial match +#' (e.g., pattern matching) on the command names. It returns the content of the first match if only +#' one match is found. If multiple matches are found, it outputs the number of hits and their names. +#' +#' @param obj A Seurat object. **Default:** None. +#' @param pattern A character string representing the pattern to match command names. **Default:** None. +#' +#' @return If exactly one match is found, the function returns the content of the first match. If +#' multiple matches are found, it returns `NULL` after displaying the number of matches and their names. +#' +#' @examples +#' # Assuming 'combined.obj' is your Seurat object +#' result <- FindCommandInObject(combined.obj, "^FindVariable") +#' +#' @importFrom checkmate assert_class assert_character assert_string + +.FindCommandInObject <- function(obj, pattern, perl = TRUE) { + + command_names <- names(obj@commands) # Get all command names + + # Find matches using partial pattern matching + matches <- grep(pattern, command_names, value = TRUE, perl = perl) + + # Check the number of matches + if (length(matches) == 0) { + stop("No matching commands found.") + } else { + if (length(matches) > 1) { + # Multiple matches found, print the number of hits and their names + message(length(matches), " matches found: ", paste(matches, collapse = ", ")) + } + # Return the content of the last match + return(obj@commands[[matches[length(matches)]]]) + } +} + + + + +# _________________________________________________________________________________________________ +#' @title Parse basic obj stats +#' +#' @description Parse cell and feature count to a string. +#' @param obj An object to extract information from. +#' @return A character string summarizing the key parameters. +#' +.parseBasicObjStats <- function(obj, sep = " ", assay = DefaultAssay(obj), + simple = FALSE, suffix = NULL) { + n.cells <- format(length(Cells(obj)), big.mark = sep, scientific = FALSE) + n.feat <- format(length(Features(obj, assay = assay)), big.mark = sep, scientific = FALSE) + if (simple) { + return(paste(n.cells, "cells.", suffix)) + } else { + return(paste(n.cells, "cells,", n.feat, assay, "features.", suffix)) + } +} + + + +# _________________________________________________________________________________________________ +# New additions, categorized _____________________________ ------ +# _________________________________________________________________________________________________ + + + + + + + + + + + + + +# _________________________________________________________________________________________________ +# Temp _____________________________ ------ +# _________________________________________________________________________________________________ + diff --git a/R/Seurat.Utils.Visualization.R b/R/Seurat.Utils.Visualization.R index 605c7eb..8f3f37c 100644 --- a/R/Seurat.Utils.Visualization.R +++ b/R/Seurat.Utils.Visualization.R @@ -26,7 +26,7 @@ #' @param above.ribo Upper bound of ribosomal content. Default: `p$thr.hp.ribo`. #' @param below.nFeature_RNA Lower bound of RNA features. Default: `p$thr.lp.nFeature_RNA`. #' @param above.nFeature_RNA Upper bound of RNA features. Default: `p$thr.hp.nFeature_RNA`. -#' @param subdir Subdirectory within `parentdir` where plots will be stored. Default is generated using a call to `kpp()`. +#' @param subdir Subdirectory within `parentdir` where plots will be stored. Default: generated using a call to `kpp()`. #' @param transparency Point transparency on scatter plots. Default: 0.25. #' @param cex Size of points on scatter plots. Default: 0.75. #' @param theme.used A `ggplot2` theme for all plots. Default: `theme_bw(base_size = 18)`. @@ -36,8 +36,8 @@ #' \dontrun{ #' if (interactive()) { #' # !! Default arguments assume that `p` is a list of parameters, present in the global -#' environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`, -#' `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. +#' # environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`, +#' # `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. #' PlotFilters(ls.Seurat) #' } #' } @@ -73,11 +73,11 @@ PlotFilters <- function( cex = 0.75, theme.used = theme_bw(base_size = 18), LabelDistFromTop = 200 # for barplot_label -) { + ) { message("Expects a list of Seurat objects, `ls.obj` with names, and a list of parameters, `par.ls` with a defined structure.") # Create names based on the Seurat objects, catenating "dataset" and numbers 1:n - if(is.null(suffices)) { + if (is.null(suffices)) { suffices <- paste0("obj_", 1:length(ls.obj)) message("Provide suffixes unique to each dataset, ideally as names of the list of Seu objects!") } @@ -133,18 +133,20 @@ PlotFilters <- function( mm <- cbind(mm, filt.nFeature_RNA, filt.below.mito, filt.below.ribo) mm$colour.thr.nFeature <- cut(mm$"nFeature_RNA", - breaks = c(-Inf, above.nFeature_RNA, below.nFeature_RNA, Inf), - labels = c( - paste0("LQ (<", above.nFeature_RNA, ")"), - paste0("HQ (", above.nFeature_RNA, "< X <", below.nFeature_RNA, ")"), - paste0("Dbl/Outlier (>", below.nFeature_RNA, ")") - ) + breaks = c(-Inf, above.nFeature_RNA, below.nFeature_RNA, Inf), + labels = c( + paste0("LQ (<", above.nFeature_RNA, ")"), + paste0("HQ (", above.nFeature_RNA, "< X <", below.nFeature_RNA, ")"), + paste0("Dbl/Outlier (>", below.nFeature_RNA, ")") + ) ) A <- ggplot(data = mm, aes(x = nFeature_RNA, fill = colour.thr.nFeature)) + geom_histogram(binwidth = 100) + - ggtitle(paste("Cells between", above.nFeature_RNA, "and", below.nFeature_RNA, - " UMIs are selected \n(", pc_TRUE(filt.nFeature_RNA), ")")) + + ggtitle(paste( + "Cells between", above.nFeature_RNA, "and", below.nFeature_RNA, + " UMIs are selected \n(", pc_TRUE(filt.nFeature_RNA), ")" + )) + geom_vline(xintercept = below.nFeature_RNA) + geom_vline(xintercept = above.nFeature_RNA) + theme(legend.position = "top") @@ -204,8 +206,10 @@ PlotFilters <- function( # D plot_list <- list(A, B, C, D) - px <- cowplot::plot_grid(plotlist = plot_list, nrow = 2, ncol = 2, - labels = LETTERS[1:4], label_size = 20) + px <- cowplot::plot_grid( + plotlist = plot_list, nrow = 2, ncol = 2, + labels = LETTERS[1:4], label_size = 20 + ) fname <- kpps(OutDir, FixPlotName("Filtering.thresholds", suffices[i], filetype)) cowplot::save_plot(filename = fname, plot = px, base_height = 14, ncol = 1, nrow = 1) # Figure 2 @@ -277,8 +281,10 @@ scCalcPCAVarExplained <- function(obj = combined.obj) { # Determine percent of v #' @param sub Subtitle for the plot, typically including information about the number of cells and #' features analyzed. Default: A string generated from `obj` stating the number of cells and #' features. +#' @param caption A caption for the plot. Default: "hline at 1%". #' @param use.MarkdownReports Boolean indicating whether to use `MarkdownReports` for plotting. #' If `FALSE`, `ggExpress` is used. Default: `FALSE`. +#' @param ... Additional arguments to be passed to `ggExpress::qbarplot` or `MarkdownReports::wbarplot`. #' #' @return Generates a plot showing the percent of variation each PC accounts for. This function #' does not return a value but instead generates a plot directly. @@ -298,21 +304,21 @@ scCalcPCAVarExplained <- function(obj = combined.obj) { # Determine percent of v scPlotPCAvarExplained <- function(obj = combined.obj, plotname = "Variance Explained by Principal Components", sub = paste(ncol(obj), "cells, ", nrow(obj), "features."), - use.MarkdownReports = FALSE, - # caption = .parseKeyParams(obj, suffix = "| hline at 1%"), caption = "hline at 1%", + # caption = .parseKeyParams(obj, suffix = "| hline at 1%"), + use.MarkdownReports = FALSE, ...) { message(" > Running scPlotPCAvarExplained...") pct <- scCalcPCAVarExplained(obj) if (use.MarkdownReports) { - MarkdownReports::wbarplot(pct, xlab = "Principal Components", ylab = "% of variation explained") + MarkdownReports::wbarplot(pct, xlab = "Principal Components", ylab = "% of variation explained", ...) barplot_label(round(pct, digits = 2), barplotted_variable = pct, cex = .5) } else { ggExpress::qbarplot( vec = pct, plotname = plotname, subtitle = sub, xlab = "Principal Components", ylab = "% of variation explained", - w = 10, h = 5, hline = 1, caption = caption + w = 10, h = 5, hline = 1, caption = caption, ... ) } } @@ -333,9 +339,9 @@ scPlotPCAvarExplained <- function(obj = combined.obj, #' #' @param obj A Seurat object containing gene expression data. #' @param n.genes.barplot The number of top genes to be displayed in the final barplot, showing -#' their expression as a percentage of the total UMIs. Default is 25. +#' their expression as a percentage of the total UMIs. Default: 25. #' @param width.barplot The width of the barplot that visualizes the highest expressed genes. -#' Default is a quarter of `n.genes.barplot`. +#' Default: a quarter of `n.genes.barplot`. #' #' @return The same Seurat object passed as input, but with an additional list in the `@misc` slot #' named `'TotalReadFraction'` that contains the relative total expression of each gene as a @@ -371,24 +377,26 @@ PercentInTranscriptome <- function( Relative.of.Total.Gene.Expression <- relative.total.Expr * 100 qhistogram(Relative.of.Total.Gene.Expression, - logX = FALSE, logY = TRUE, - plotname = "Gene expression as fraction of all UMI's", - subtitle = "Percentage in RNA-counts", - xlab = "Percent in Transcriptome (total per gene)", - ylab = "Number of genes", - xlab.angle = 45, - w = 7, h = 5, - ...) + logX = FALSE, logY = TRUE, + plotname = "Gene expression as fraction of all UMI's", + subtitle = "Percentage in RNA-counts", + xlab = "Percent in Transcriptome (total per gene)", + ylab = "Number of genes", + xlab.angle = 45, + w = 7, h = 5, + ... + ) Highest.Expressed.Genes <- head(iround(100 * relative.total.Expr), n = n.genes.barplot) qbarplot(Highest.Expressed.Genes, - plotname = "Percentage of highest expressed genes", - subtitle = "Total, in RNA-counts", - xlab = "", - ylab = "Gene expression as percent of all UMI's", - xlab.angle = 45, - w = width.barplot, h = 5, - ...) + plotname = "Percentage of highest expressed genes", + subtitle = "Total, in RNA-counts", + xlab = "", + ylab = "Gene expression as percent of all UMI's", + xlab.angle = 45, + w = width.barplot, h = 5, + ... + ) message("!!! \nTotalReadFraction is now stored under combined.obj@misc$'TotalReadFraction'.") @@ -439,10 +447,10 @@ plotGeneExpressionInBackgroundHist <- function( (pname <- paste(gene, "and the", suffx, "transcript count distribution")) ggExpress::qhistogram(GEX.Counts.total, - vline = genes.expression, logX = TRUE, w = w, h = h, - subtitle = paste("It belong to the top", pc_TRUE(GEX.Counts.total > genes.expression), "of genes (black line). Mean expr:", mean.expr), - plotname = pname, xlab = "Total Transcripts in Dataset", ylab = "Number of Genes", - ... + vline = genes.expression, logX = TRUE, w = w, h = h, + subtitle = paste("It belong to the top", pc_TRUE(GEX.Counts.total > genes.expression), "of genes (black line). Mean expr:", mean.expr), + plotname = pname, xlab = "Total Transcripts in Dataset", ylab = "Number of Genes", + ... ) } @@ -459,19 +467,19 @@ plotGeneExpressionInBackgroundHist <- function( #' @param obj Seurat object to analyze; Default: `combined.obj`. #' @param genes Vector of gene names to include in the analysis; Default: c("MALAT1", "MT-CO1"). #' @param assay Assay to use from the Seurat object; Default: "RNA". -#' @param slot_ Data slot to use ('data' or 'counts'); Default: "data". +#' @param layerX Data slot to use ('data' or 'counts'); Default: "data". #' @param thr_expr Expression threshold for highlighting in the plot; Default: 10. #' @param suffix Additional text to append to the plot title; Default: NULL. #' @param prefix Additional text to prepend to the plot title; Default: NULL. #' @param xlab Label for the x-axis; Default: "log10(Summed UMI count @data)". -#' @param return_cells_passing If TRUE, returns count of cells exceeding the expression threshold; Default: TRUE. +#' @param return_cells_passing If TRUE, returns count of cells exceeding the expression threshold; Default: `TRUE`. #' @param clip_count_qtl_thr Quantile threshold for clipping if using count data; Default: 0.95. #' Needed for visualization (to avoid x axis compression). -#' @param log10_counts If TRUE, log10-transforms the COUNT expression values; Default: TRUE. -#' @param return_quantile If TRUE, returns cell count exceeding the quantile threshold; Default: FALSE. +#' @param log10_counts If TRUE, log10-transforms the COUNT expression values; Default: `TRUE`. +#' @param return_quantile If TRUE, returns cell count exceeding the quantile threshold; Default: `FALSE`. #' @param w Width of the plot in inches; Default: 9. #' @param h Height of the plot in inches; Default: 5. -#' @param show_plot If TRUE, displays the generated plot; Default: TRUE. +#' @param show_plot If TRUE, displays the generated plot; Default: `TRUE`. #' @param ... Additional arguments for customization. #' #' @return Depending on the parameters, can return a ggplot object, the number of cells passing @@ -514,8 +522,10 @@ plotGeneExprHistAcrossCells <- function( # Aggregate genes if necessary aggregate <- length(genes) > 1 - SummedExpressionPerCell <- colSums(LayerData(object = obj, assay = assay, - layer = layerX)[genes, , drop = F]) + SummedExpressionPerCell <- colSums(LayerData( + object = obj, assay = assay, + layer = layerX + )[genes, , drop = FALSE]) # Clip counts if necessary if (layerX == "counts") { @@ -535,23 +545,22 @@ plotGeneExprHistAcrossCells <- function( if (aggregate) { SUBT <- paste0(SUBT, "\n", length(genes), "genes summed up, e.g: ", kppc(head(genes))) TTL <- kppd(prefix, plotname[1], suffix) - } else { TTL <- trimws(paste(prefix, plotname[length(plotname)], paste(genes), suffix)) } # Create the plot pobj <- ggExpress::qhistogram(SummedExpressionPerCell, - plotname = TTL, - subtitle = SUBT, - caption = CPT, - prefix = prefix, - suffix = suffix, - vline = thr_expr[1], filtercol = -1, - xlab = xlab, - ylab = "# of cells", - w = w, h = h, - ... + plotname = TTL, + subtitle = SUBT, + caption = CPT, + prefix = prefix, + suffix = suffix, + vline = thr_expr[1], filtercol = -1, + xlab = xlab, + ylab = "# of cells", + w = w, h = h, + ... ) # draw additional vlines if needed @@ -583,15 +592,15 @@ plotGeneExprHistAcrossCells <- function( #' @param obj A Seurat object. Default: combined.obj. #' @param feature The feature to evaluate. #' @param ident The identity class to split the data by. Default: GetNamedClusteringRuns(obj)[1]. -#' @param box Logical value indicating whether to plot the boxplot. Default: TRUE. +#' @param box Logical value indicating whether to plot the boxplot. Default: `TRUE`. #' @param box.ident The identity class to split the data by for individual dots in the boxplot. #' Ident will be used for the boxes displayed (matching the barplot). Default: NULL. #' @param threshold The threshold value to evaluate the feature against. Default: 2. #' @param subset_ident The identity class to subset the data by. Default: NULL. #' @param subset_values The values of the identity class to keep in the subset. Default: NULL. -#' @param omit.na Logical value indicating whether to omit NA values. Default: TRUE. +#' @param omit.na Logical value indicating whether to omit NA values. Default: `TRUE`. #' @param assay The assay to use for feature extraction. Default: 'RNA'. -#' @param plot Logical value indicating whether to plot the results. Default: TRUE. +#' @param plot Logical value indicating whether to plot the results. Default: `TRUE`. #' @param ylab The label for the y-axis of the plot. Default: "% cells above threshold". #' @param ... Additional parameters to pass to the plotting function. #' @@ -599,12 +608,11 @@ plotGeneExprHistAcrossCells <- function( #' #' @examples #' \dontrun{ -#' PctCellsAboveX(obj = seurat_object, feature = 'GeneA', ident = 'CellType', threshold = 1.5) +#' PctCellsAboveX(obj = seurat_object, feature = "GeneA", ident = "CellType", threshold = 1.5) #' } - PctCellsAboveX <- function(obj = combined.obj, feature = "TOP2A", - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], threshold = 2, suffix = ppp(substitute(obj), ncol(obj), "thr", threshold), box = FALSE, @@ -612,19 +620,18 @@ PctCellsAboveX <- function(obj = combined.obj, subset_ident = NULL, subset_values = NULL, omit.na = TRUE, - assay = 'RNA', + assay = "RNA", plot = TRUE, caption = NULL, ylab = "% cells above threshold", # color = NULL, ...) { - stopifnot( is(obj, "Seurat"), feature %in% colnames(obj@meta.data) | feature %in% Features(obj, assay = assay), ident %in% colnames(obj@meta.data), is.null(subset_ident) | subset_ident %in% colnames(obj@meta.data), - is.null(subset_values) | subset_values %in% unique(obj@meta.data[ ,subset_ident]), + is.null(subset_values) | subset_values %in% unique(obj@meta.data[, subset_ident]), !box & is.null(ident.box) | box ) @@ -633,20 +640,19 @@ PctCellsAboveX <- function(obj = combined.obj, if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip) } - split_ident <- if(box) ident.box else ident + split_ident <- if (box) ident.box else ident ls_feat <- split(obj@meta.data[, feature], f = obj@meta.data[, split_ident]) if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip) # Calculate the percentage of cells above the threshold for each split_ident Fraction.of.Cells.Above.Threshold <- sapply(ls_feat, function(x) sum(x > threshold) / length(x)) - if(box) { - + if (box) { # Arrange ident.box to categories of ident ls.from_to <- lapply(split(obj@meta.data[, ident.box], f = obj@meta.data[, ident]), unique) from_to <- list.2.replicated.name.vec(ls.from_to) - stopifnot(all( names(from_to) %in% names(Fraction.of.Cells.Above.Threshold) )) + stopifnot(all(names(from_to) %in% names(Fraction.of.Cells.Above.Threshold))) from_to <- from_to[names(Fraction.of.Cells.Above.Threshold)] @@ -655,31 +661,36 @@ PctCellsAboveX <- function(obj = combined.obj, } - if(plot){ - if(is.null(caption)) caption <- pc_TRUE(is.na(Fraction.of.Cells.Above.Threshold), - suffix = "of idents yielded NA/NaN & exluded from plot.") + if (plot) { + if (is.null(caption)) { + caption <- pc_TRUE(is.na(Fraction.of.Cells.Above.Threshold), + suffix = "of idents yielded NA/NaN & exluded from plot." + ) + } TTL <- paste("Percentage of Cells Above Threshold for", feature) STL <- paste("Cells above threshold for", feature, "above", threshold) SFX <- ppp(feature, "by", ident, "thr", threshold, "subset_ident", subset_ident, suffix) Fraction.of.Cells.Above.Threshold <- na.omit.strip(Fraction.of.Cells.Above.Threshold) - if(box) { - - pobj <- qboxplot(ls.Fraction.of.Cells.Above.Threshold, - , plotname = TTL, subtitle = STL, caption = caption, suffix = SFX - , add = "dotplot", xlab.angle = 45 - , hide.legend = TRUE, - , ylab = ylab - # , xlab = ident - , ...) - - } else { "barplot" - pobj <- qbarplot(Fraction.of.Cells.Above.Threshold, label = percentage_formatter(Fraction.of.Cells.Above.Threshold), - plotname = TTL, subtitle = STL, caption = caption, suffix = SFX, - ylab = ylab - # , xlab = ident - , ...) + if (box) { + pobj <- qboxplot(ls.Fraction.of.Cells.Above.Threshold, , + plotname = TTL, subtitle = STL, caption = caption, suffix = SFX, + add = "dotplot", xlab.angle = 45, + hide.legend = TRUE, , + ylab = ylab + # , xlab = ident + , ... + ) + } else { + "barplot" + pobj <- qbarplot(Fraction.of.Cells.Above.Threshold, + label = percentage_formatter(Fraction.of.Cells.Above.Threshold), + plotname = TTL, subtitle = STL, caption = caption, suffix = SFX, + ylab = ylab + # , xlab = ident + , ... + ) } @@ -732,12 +743,12 @@ PctCellsExpressingGenes <- function(genes, obj, assay = "RNA", min.expr = 1, ident = NULL, max.idents = 100) { # Input assertions stopifnot( - is.character(genes) && length(genes) > 0, # genes must be a non-empty character vector - inherits(obj, "Seurat"), # obj must be a Seurat object - is.character(assay) && length(assay) == 1, # assay must be a single character string - is.numeric(min.expr) && length(min.expr) == 1, # min.expr must be a single numeric value - is.null(ident) || (is.character(ident) && length(ident) == 1), # ident must be NULL or a single character string - is.numeric(max.idents) && length(max.idents) == 1 && max.idents > 0 # max.idents must be a single positive numeric value + is.character(genes) && length(genes) > 0, # genes must be a non-empty character vector + inherits(obj, "Seurat"), # obj must be a Seurat object + is.character(assay) && length(assay) == 1, # assay must be a single character string + is.numeric(min.expr) && length(min.expr) == 1, # min.expr must be a single numeric value + is.null(ident) || (is.character(ident) && length(ident) == 1), # ident must be NULL or a single character string + is.numeric(max.idents) && length(max.idents) == 1 && max.idents > 0 # max.idents must be a single positive numeric value ) # Message parameters to console @@ -837,31 +848,35 @@ PctCellsExpressingGenes <- function(genes, obj, assay = "RNA", min.expr = 1, #' to the number in the smallest group. The plot's bars are grouped by one variable and filled by another. #' The function supports custom color palettes, drawing numerical values on bars, and saving the plot. #' -#' @param group.by The variable to group by for the bar plot. #' @param fill.by The variable to fill by for the bar plot. -#' @param downsample Logical indicating whether to downsample data to equalize group sizes. -#' @param min.nr.sampled.cells The minimal number of cells to sample from each identity class. Defaults to 200 cells. -#' @param dsample.to.repl.thr Logical indicating if sampling should be done with replacement. Defaults to FALSE. +#' @param group.by The variable to group by for the bar plot. +#' @param obj A Seurat object. #' @param plotname The title of the plot. +#' @param min.nr.sampled.cells The minimal number of cells to sample from each identity class. Defaults to 200 cells. +#' @param downsample Logical indicating whether to downsample data to equalize group sizes. +#' @param prefix Optional prefix for the plot title. #' @param suffix Optional suffix for the plot title. #' @param sub_title Optional subtitle for the plot. #' @param hlines Numeric vector specifying y-intercepts of horizontal lines to add to the plot. #' @param return_table Logical; if TRUE, returns a contingency table instead of plotting. +#' @param save_table Logical; if TRUE, saves the table behind the plot. #' @param save_plot Logical; if TRUE, saves the generated plot. +#' @param also.pdf Save plot in both png and pdf formats. #' @param seedNr Seed for random number generation to ensure reproducibility. -#' @param w Width of the plot in inches. -#' @param h Height of the plot in inches. #' @param draw_plot Logical; if FALSE, suppresses plotting (useful if only the table is desired). #' @param show_numbers Logical; if TRUE, adds count numbers on top of each bar in the plot. +#' @param min.pct Show % Labels above this threshold. Default = 0.05, or above 5 pct. +#' @param cex.pct Font size of pct labels. #' @param min_frequency Minimum fraction to display individually in the plot; smaller fractions #' are aggregated into an "Other" category. #' @param custom_col_palette Specifies whether to use a standard or custom color palette. #' @param color_scale Defines the color scale to use for the plot if a custom palette is selected. -#' @param also.pdf Save plot in both png and pdf formats. -#' @param min.pct Show % Labels above this threshold. Default = 0.05, or above 5 pct. -#' @param cex.pct Font size of pct labels. #' @param show.total.cells Show total cells #' @param cex.total Label size for total cells +#' @param xlab.angle Angle of x-axis labels. +#' @param show_plot Logical; if TRUE, shows the plot. +#' @param w Width of the plot in inches. Default: `NULL` +#' @param h Height of the plot in inches. Default: `6` #' @param ... Additional parameters passed to internally called functions. #' #' @return Depending on the value of `return_table`, either returns a ggplot object or a list @@ -895,8 +910,6 @@ scBarplot.CellFractions <- function( save_plot = TRUE, also.pdf = FALSE, seedNr = 1989, - w = NULL, - h = 6, draw_plot = TRUE, show_numbers = FALSE, min.pct = 0.05, @@ -909,8 +922,9 @@ scBarplot.CellFractions <- function( cex.total = 2, xlab.angle = 45, show_plot = TRUE, + w = NULL, + h = 6, ...) { - # Input assertions stopifnot( inherits(obj, "Seurat"), # obj must be a Seurat object @@ -925,7 +939,7 @@ scBarplot.CellFractions <- function( if (is.null(w)) { categ_X <- nr.unique(META[, group.by]) categ_Y <- nr.unique(META[, fill.by]) - w <- ceiling(max(6, categ_Y/4, categ_X/2)) + w <- ceiling(max(6, categ_Y / 4, categ_X / 2)) } set.seed(seedNr) @@ -1039,13 +1053,12 @@ scBarplot.CellFractions <- function( stopifnot("Length(custom_col_palette) should be >= nr. categories displayed." = length(custom_col_palette) >= n.categories) all_categs_have_a_col <- all(categories %in% names(custom_col_palette)) - if(all_categs_have_a_col) { + if (all_categs_have_a_col) { colz_manual <- custom_col_palette[categories] } else { colz_manual <- custom_col_palette[seq(categories)] } # end if all_categs_have_a_col pl <- pl + scale_fill_manual(values = colz_manual) - } else if (rnd_colors) { colz_manual <- sample(rainbow(n.categories)) pl <- pl + scale_fill_manual(values = colz_manual) @@ -1055,13 +1068,13 @@ scBarplot.CellFractions <- function( if (show_numbers) { pl <- pl + geom_text(aes(label = ..count..), - stat = "count", position = position_fill(vjust = 0.5) + stat = "count", position = position_fill(vjust = 0.5) ) } else { pl <- pl + geom_text( aes(label = ifelse((..count.. / tapply(..count.., ..x.., sum)[..x..]) >= min.pct, - scales::percent(..count.. / tapply(..count.., ..x.., sum)[..x..], accuracy = 1), - "" + scales::percent(..count.. / tapply(..count.., ..x.., sum)[..x..], accuracy = 1), + "" )), stat = "count", position = position_fill(vjust = 0.5), size = cex.pct @@ -1094,13 +1107,14 @@ scBarplot.CellFractions <- function( ) if (save_table) { - ReadWriter::write.simple.xlsx(CT_freq_sc, filename = sppp(FixPlotName(plotname), suffix, "fr.barplot") - # suffix = sppp(FixPlotName(plotname), "fr.barplot") + ReadWriter::write.simple.xlsx(CT_freq_sc, + filename = sppp(FixPlotName(plotname), suffix, "fr.barplot") + # suffix = sppp(FixPlotName(plotname), "fr.barplot") ) } # Return contingency table or plot based on return_table flag - if(show_plot) print(pl) + if (show_plot) print(pl) if (return_table) { return(CT_freq_sc) @@ -1121,12 +1135,12 @@ scBarplot.CellFractions <- function( #' @param obj Seurat object for analysis. Default: `combined.obj`. #' @param ident Cluster identity. Used to specify which clustering results to visualize. #' Default: First entry from ordered clustering runs. -#' @param sort If TRUE, sorts clusters by size. Default: FALSE. +#' @param sort If TRUE, sorts clusters by size. Default: `FALSE`. #' @param title Title for the plot. Default: "Cells per Identity Group". #' @param sub Subtitle for the plot. Default: "identity". -#' @param label If TRUE, shows cell count or percentage based on the label vector. Default: TRUE. +#' @param label If TRUE, shows cell count or percentage based on the label vector. Default: `TRUE`. #' @param palette Color palette for the barplot. Default: 'glasbey'. -#' @param return_table If TRUE, returns the data used for plotting instead of the plot itself. Default: FALSE. +#' @param return_table If TRUE, returns the data used for plotting instead of the plot itself. Default: `FALSE`. #' @param min.cells Minimum cell count threshold for categories. Adjusted by data size. #' @param suffix Optional suffix for file naming. Used in conjunction with `kpp`. #' @param ylab_adj Adjustment factor for y-axis label positioning. Default: 1.1. @@ -1154,10 +1168,9 @@ scBarplot.CellsPerCluster <- function( ylab_adj = 1.1, min.cells = round(ncol(obj) / 100), ...) { - stopifnot( inherits(obj, "Seurat"), is.character(ident), is.logical(sort), is.character(plotname), is.character(sub), - is.logical(label), is.character(suffix) | is.null(suffix), is.character(palette), is.logical(return_table), + is.logical(label), is.character(suffix) | is.null(suffix), is.character(palette), is.logical(return_table), is.numeric(ylab_adj), is.numeric(min.cells), ident %in% colnames(obj@meta.data) ) @@ -1181,22 +1194,23 @@ scBarplot.CellsPerCluster <- function( n.clusters <- length(cell.per.cluster) nr.cells.per.cl <- table(obj[[ident]][, 1]) - SBT <- pc_TRUE(nr.cells.per.cl < min.cells, NumberAndPC = TRUE, - suffix = paste("of identites are below:", min.cells, "cells, or", percentage_formatter(min.PCT.cells), "of all cells." ) + SBT <- pc_TRUE(nr.cells.per.cl < min.cells, + NumberAndPC = TRUE, + suffix = paste("of identites are below:", min.cells, "cells, or", percentage_formatter(min.PCT.cells), "of all cells.") ) pl <- ggExpress::qbarplot(cell.per.cluster, - plotname = plotname, - subtitle = paste0(sub, "\n", SBT), - suffix = kpp(ident, ncol(obj),"c", suffix), - col = 1:n.clusters, - caption = .parseBasicObjStats(obj = obj), - xlab.angle = 45, - ylim = c(0, ylab_adj * max(cell.per.cluster)), - label = lbl, - ylab = "Cells", - palette_use = DiscretePaletteSafe(n = n.clusters, palette.used = palette), - ... + plotname = plotname, + subtitle = paste0(sub, "\n", SBT), + suffix = kpp(ident, ncol(obj), "c", suffix), + col = 1:n.clusters, + caption = .parseBasicObjStats(obj = obj), + xlab.angle = 45, + ylim = c(0, ylab_adj * max(cell.per.cluster)), + label = lbl, + ylab = "Cells", + palette_use = DiscretePaletteSafe(n = n.clusters, palette.used = palette), + ... ) if (return_table) { @@ -1216,7 +1230,7 @@ scBarplot.CellsPerCluster <- function( #' @param obj Seurat object for analysis. Default: `combined.obj`. #' @param ident Clustering identity to base the plot on. #' Default: The second entry from `GetClusteringRuns()`. -#' @param plot Whether to display the plot (TRUE) or return cluster sizes (FALSE). Default: TRUE. +#' @param plot Whether to display the plot (TRUE) or return cluster sizes (FALSE). Default: `TRUE`. #' @param thr.hist Threshold for switching from a bar plot to a histogram based on the number of #' clusters. Default: 30. #' @param ... Extra parameters for the plot. @@ -1250,8 +1264,8 @@ plotClustSizeDistr <- function( if (plot) { if (length(clust.size.distr) < thr.hist) { ggExpress::qbarplot(clust.size.distr, - plotname = ptitle, subtitle = psubtitle, - label = clust.size.distr, xlab = "Clusters", ylab = xlb, ... + plotname = ptitle, subtitle = psubtitle, + label = clust.size.distr, xlab = "Clusters", ylab = xlb, ... ) } else { ggExpress::qhistogram( @@ -1276,11 +1290,11 @@ plotClustSizeDistr <- function( #' @param thrX Threshold for calculating the fraction of cells. Default: 0.3. #' @param obj Seurat object with single-cell data. Default: `combined.obj`. #' @param id.col Cluster identity column in metadata. Default: 'cl.names.top.gene.res.0.3'. -#' @param return.df Whether to return the underlying data frame instead of the plot. Default: FALSE. +#' @param return.df Whether to return the underlying data frame instead of the plot. Default: `FALSE`. #' @param label Whether to add labels to the bar plot. Default: NULL. #' @param subtitle Optional subtitle for the plot. #' @param suffix Suffix for the output file name. -#' @param above Whether to calculate the fraction of cells above or below the threshold. Default: TRUE. +#' @param above Whether to calculate the fraction of cells above or below the threshold. Default: `TRUE`. #' @param ... Additional parameters for plotting functions. #' #' @examples @@ -1312,13 +1326,13 @@ scBarplot.FractionAboveThr <- function( dplyr::select(c(id.col, value.col)) (df_cells_above <- metacol |> - dplyr::group_by(!!sym(id.col)) |> - summarize( - n_cells = n(), - n_cells_above = sum(!!sym(value.col) > thrX), - fr_n_cells_above = n_cells_above / n_cells, - fr_n_cells_below = 1 - fr_n_cells_above - ) + dplyr::group_by(!!sym(id.col)) |> + summarize( + n_cells = n(), + n_cells_above = sum(!!sym(value.col) > thrX), + fr_n_cells_above = n_cells_above / n_cells, + fr_n_cells_below = 1 - fr_n_cells_above + ) ) @@ -1345,21 +1359,21 @@ scBarplot.FractionAboveThr <- function( pname <- paste("Pc. cells", tag, value.col, "of", thrX) ggobj <- ggExpress::qbarplot(v.fr_n_cells_above, - label = label, - plotname = pname, - filename = FixPlotName(kpp(pname, id.col, ".pdf")), - suffix = suffix, - subtitle = subtitle, - caption = paste( - "Overall average (black line):", iround(total_average), "% |", - substitute(obj) - ), - xlab.angle = 45, - xlab = "Clusters", - ylab = paste("% Cells", tag, "thr. (", value.col, ")"), - ylim = ylim, - hline = total_average, - ... + label = label, + plotname = pname, + filename = FixPlotName(kpp(pname, id.col, ".pdf")), + suffix = suffix, + subtitle = subtitle, + caption = paste( + "Overall average (black line):", iround(total_average), "% |", + substitute(obj) + ), + xlab.angle = 45, + xlab = "Clusters", + ylab = paste("% Cells", tag, "thr. (", value.col, ")"), + ylim = ylim, + hline = total_average, + ... ) print(ggobj) if (return.df) { @@ -1381,7 +1395,7 @@ scBarplot.FractionAboveThr <- function( #' @param value.col Metadata column with values for threshold comparison. Default: 'percent.ribo'. #' @param id.col Cluster identifier in metadata. Default: 'cl.names.top.gene.res.0.3'. #' @param obj Seurat object with cell data. Default: `combined.obj`. -#' @param return.df If TRUE, returns the data frame instead of the plot. Default: FALSE. +#' @param return.df If TRUE, returns the data frame instead of the plot. Default: `FALSE`. #' #' @examples #' \dontrun{ @@ -1434,11 +1448,10 @@ scBarplot.FractionBelowThr <- function( #' #' @examples #' \dontrun{ -#' scPieClusterDistribution(obj = combined.obj, ident = 'cluster_identity') +#' scPieClusterDistribution(obj = combined.obj, ident = "cluster_identity") #' } scPieClusterDistribution <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[1], ...) { - # Input assertions stopifnot( is(obj, "Seurat"), is.character(ident), length(ident) == 1, @@ -1475,7 +1488,7 @@ scPieClusterDistribution <- function(obj = combined.obj, ident = GetClusteringRu #' @param plotname Title for the plot. Default: 'Nr.Cells.After.Filtering'. #' @param xlab.angle Angle for x-axis labels, enhancing readability. Default: 45. #' @param names Optionally provide custom names for x-axis labels. If FALSE, uses object names -#' from `ls.obj`. Default: FALSE. +#' from `ls.obj`. Default: `FALSE`. #' @param ... Extra parameters passed to `qbarplot`. #' #' @examples @@ -1499,12 +1512,12 @@ scBarplot.CellsPerObject <- function( cellCounts <- sapply(ls.obj, ncol) names(cellCounts) <- if (length(names) == length(ls.obj)) names else names(ls.obj) ggExpress::qbarplot(cellCounts, - plotname = plotname, - subtitle = paste(sum(cellCounts), "cells in total"), - label = cellCounts, - xlab.angle = xlab.angle, - ylab = "Cells", - ... + plotname = plotname, + subtitle = paste(sum(cellCounts), "cells in total"), + label = cellCounts, + xlab.angle = xlab.angle, + ylab = "Cells", + ... ) } @@ -1518,6 +1531,9 @@ scBarplot.CellsPerObject <- function( #' #' @param ls.obj List of Seurat objects. #' @param meta.col The metadata column name to be used for the barplot. +#' @param ... Additional arguments passed to `ggExpress::qbarplot.df`. + +#' #' @return A ggplot object representing the stacked barplot. #' #' @examples @@ -1558,9 +1574,9 @@ scBarplotStackedMetaCateg_List <- function( TTL <- paste(meta.col, "per object") p <- ggExpress::qbarplot.df(df, - plotname = TTL, - scale = TRUE, hide.legend = FALSE, - ... + plotname = TTL, + scale = TRUE, hide.legend = FALSE, + ... ) print(p) return(df) @@ -1604,15 +1620,13 @@ gg_color_hue <- function(n) { #' @param obj Seurat object, Default: combined.obj #' @param palette.used The name of the palette to use, Default: c("alphabet", "alphabet2", #' "glasbey", "polychrome", "stepped")[1] -#' @param show.colors Whether to display the colors in the palette, Default: FALSE +#' @param show.colors Whether to display the colors in the palette, Default: `FALSE`. #' @examples #' \dontrun{ #' if (interactive()) { #' getDiscretePalette() #' } #' } -#' @importFrom MarkdownHelpers color_check -#' @importFrom gplots rich.colors #' #' @export getDiscretePalette <- function() .Deprecated("DiscretePaletteSafe and DiscretePaletteObj") @@ -1628,7 +1642,7 @@ getDiscretePalette <- function() .Deprecated("DiscretePaletteSafe and DiscretePa #' @param obj Seurat object containing clustering information. #' @param palette.used The palette name to use for color generation. Options include "alphabet", #' "alphabet2", "glasbey", "polychrome", and "stepped". Default: "alphabet2". -#' @param show.colors If TRUE, displays the generated colors. Default: FALSE. +#' @param show.colors If TRUE, displays the generated colors. Default: `FALSE`. #' @param seed Seed for random color generation, ensuring reproducibility. Default: 1989. #' #' @return A character vector of color values corresponding to the number of clusters. @@ -1678,7 +1692,7 @@ getDiscretePaletteObj <- function(ident.used, #' @param n Number of colors to generate. #' @param palette.used Palette name to use for generating colors. Options include "alphabet", #' "alphabet2", "glasbey", "polychrome", "stepped". Default: "alphabet2". -#' @param show.colors If TRUE, displays the generated color palette. Default: FALSE. +#' @param show.colors If TRUE, displays the generated color palette. Default: `FALSE`. #' @param seed Seed value for reproducibility, especially when random color generation is involved. #' Default: 1989. #' @@ -1738,12 +1752,12 @@ DiscretePaletteSafe <- function(n, #' #' @param obj Seurat object containing clustering information. #' @param use_new_palettes Logical indicating whether to use custom palettes defined in -#' `DiscretePalette` function. Default: TRUE. +#' `DiscretePalette` function. Default: `TRUE`. #' @param palette Name of the color palette to use if `use_new_palettes` is TRUE. #' Options: "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: "glasbey". #' @param ident Clustering identity to use for coloring. Retrieved from the first entry #' of `GetClusteringRuns()` by default. -#' @param show If TRUE, displays a plot showing the color mapping for each cluster. Default: TRUE. +#' @param show If TRUE, displays a plot showing the color mapping for each cluster. Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -1790,10 +1804,10 @@ getClusterColors <- function( #' @param obj Seurat object from which to extract cluster colors. #' Default: `combined.obj`. #' @param plot.colors If TRUE, visually displays the color scheme. -#' Default: FALSE. +#' Default: `FALSE`. #' @param simple If TRUE, returns only the unique set of colors used. #' If FALSE, returns a named vector mapping cluster identities to colors. -#' Default: FALSE. +#' Default: `FALSE`. #' #' @examples #' \dontrun{ @@ -1877,23 +1891,25 @@ plotAndSaveHeatmaps <- function(results, path = getwd(), show_colnames = TRUE, rowname_column = 1, ...) { - stopifnot(is.list(results), is.character(file.prefix), is.character(path) ) + stopifnot(is.list(results), is.character(file.prefix), is.character(path)) for (mt in names(results)) { res <- results[[mt]] - stopifnot( !anyNA(res[[rowname_column]]), - !anyNaN(res[[rowname_column]]) + stopifnot( + !anyNA(res[[rowname_column]]), + !anyNaN(res[[rowname_column]]) ) # Generate heatmap plot - x <- ReadWriter::column.2.row.names(results[[mt]], rowname_column = rowname_column ) - pobj <- pheatmap::pheatmap(mat = x, - main = paste("Heatmap of", mt, "values"), - scale = "column", - cluster_rows = cluster_rows, - display_numbers = display_numbers, - show_rownames = show_rownames, - show_colnames = show_colnames + x <- ReadWriter::column.2.row.names(results[[mt]], rowname_column = rowname_column) + pobj <- pheatmap::pheatmap( + mat = x, + main = paste("Heatmap of", mt, "values"), + scale = "column", + cluster_rows = cluster_rows, + display_numbers = display_numbers, + show_rownames = show_rownames, + show_colnames = show_colnames ) # Construct file name @@ -1901,8 +1917,10 @@ plotAndSaveHeatmaps <- function(results, path = getwd(), file_path <- file.path(path, file_name) # Save plot - MarkdownReports::wplot_save_pheatmap(x = pobj, data = x, plotname = file_name, - png = TRUE, pdf = FALSE, ...) + MarkdownReports::wplot_save_pheatmap( + x = pobj, data = x, plotname = file_name, + png = TRUE, pdf = FALSE, ... + ) cat("Saved heatmap for", mt, "to", file_path, "\n") } # for } @@ -1974,20 +1992,22 @@ qFeatureScatter <- function( #' @param ident A character vector specifying the identities to be used in the plot. #' @param split.by A character string specifying the grouping variable for splitting the plot. #' @param colors A character vector specifying the colors to use for the plot. +#' @param clip.outliers A logical indicating whether to clip outliers. #' @param replace.na A logical indicating whether NA values should be replaced. #' @param pt.size The size of the individual datapoints in the plot. Set to 0 to get a clean violin plot. -#' @param sub Subtitle of the plot. Default is feature by ident. +#' @param sub Subtitle of the plot. Default: feature by ident. #' @param suffix An optional string to append to the title of the plot. #' @param suffix.2.title A logical indicating whether to append the suffix to the plot title. #' @param logY A logical indicating whether to use a logarithmic scale for the y-axis. #' @param hline A numeric or logical value; if numeric, the value where a horizontal line should be drawn. #' @param caption A character string or logical for the plot caption. If FALSE, no caption is displayed. -#' @param ylab Y-axis label. Default is "Expression". +#' @param ylab Y-axis label. Default: "Expression". #' @param ylimit A numeric vector specifying the limits of the y-axis. #' @param legend Show legend; Default: opposite of `label`. #' @param legend.pos Position of legend; Default: 'NULL'. #' @param legend.title Title of legend; Default: 'split.by'. #' @param show_plot A logical indicating whether to display the plot. +#' @param grid A logical indicating whether to display grid lines. #' @param w Width of the plot. #' @param h Height of the plot. #' @param ... Additional arguments passed to `VlnPlot`. @@ -2002,7 +2022,7 @@ qFeatureScatter <- function( qSeuViolin <- function( obj, feature = "nFeature_RNA", - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], split.by = NULL, colors = NULL, clip.outliers = TRUE, @@ -2038,11 +2058,11 @@ qSeuViolin <- function( feature %in% colnames(obj@meta.data) || feature %in% rownames(obj) ) - if(exists('idents')) warning("Use arg. ident instead of idents!\n", immediate. = TRUE) - if(exists('features')) warning("Use arg. feature instead of features!\n", immediate. = TRUE) + if (exists("idents")) warning("Use arg. ident instead of idents!\n", immediate. = TRUE) + if (exists("features")) warning("Use arg. feature instead of features!\n", immediate. = TRUE) split_col <- unlist(obj[[ident]]) - if(is.null(w)) w <- ceiling(length(unique(split_col))/6) + 6 + if (is.null(w)) w <- ceiling(length(unique(split_col)) / 6) + 6 message("Plot width: ", w) @@ -2052,7 +2072,7 @@ qSeuViolin <- function( as.character(feature) } subt <- paste(feature, "- by -", ident) - if(!is.null(sub)) subt <- paste0(subt, "\n", sub) + if (!is.null(sub)) subt <- paste0(subt, "\n", sub) if (replace.na) { warning("NA's are not, but zeros are displayed on the plot. Avoid replace.na when possible", immediate. = TRUE) @@ -2063,7 +2083,8 @@ qSeuViolin <- function( if (clip.outliers) { warning("Outliers are clipped at percentiles 0.5% and 99.5%", immediate. = TRUE) obj@meta.data[[feature]] <- CodeAndRoll2::clip.outliers.at.percentile( - x = obj@meta.data[[feature]], percentiles = c(.005, .995) ) + x = obj@meta.data[[feature]], percentiles = c(.005, .995) + ) } if (!is.null(colors)) { @@ -2073,13 +2094,15 @@ qSeuViolin <- function( stopifnot("colors cannot be uniquely split by ident. Set colors = NULL!" = length(colors) == length(unique(split_col))) } - p.obj <- Seurat::VlnPlot(object = obj, - features = feature, group.by = ident, - cols = colors, split.by = split.by, - pt.size = pt.size, ...) + + p.obj <- Seurat::VlnPlot( + object = obj, + features = feature, group.by = ident, + cols = colors, split.by = split.by, + pt.size = pt.size, ... + ) + theme(axis.title.x = element_blank()) + labs(y = ylab) + - ggtitle(label = ttl, subtitle = subt ) + ggtitle(label = ttl, subtitle = subt) if (!legend) p.obj <- p.obj + NoLegend() if (!is.null(legend.title)) p.obj <- p.obj + guides(fill = guide_legend(legend.title)) else NULL @@ -2120,19 +2143,19 @@ qSeuViolin <- function( #' @param splitby Column in the metadata to split the cells by; Default: NULL. #' @param prefix Prefix added before the filename; Default: NULL. #' @param suffix Suffix added to the end of the filename; Default: `sub`. -#' @param save.plot If TRUE, the plot is saved into a file; Default: TRUE. -#' @param PNG If TRUE, the file is saved as a .png; Default: TRUE. +#' @param save.plot If TRUE, the plot is saved into a file; Default: `TRUE`. +#' @param PNG If TRUE, the file is saved as a .png; Default: `TRUE`. #' @param h Height of the plot in inches; Default: 7. #' @param w Width of the plot in inches; Default: NULL. #' @param nr.cols Number of columns to combine multiple feature plots, ignored if `split.by` is not NULL; Default: NULL. #' @param assay Which assay to use ('RNA' or 'integrated'); Default: 'RNA'. -#' @param axes If TRUE, axes are shown on the plot; Default: FALSE. -#' @param aspect.ratio Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: FALSE. -#' @param HGNC.lookup If TRUE, HGNC gene symbol lookup is performed; Default: TRUE. -#' @param make.uppercase If TRUE, feature names are converted to uppercase; Default: FALSE. +#' @param axes If TRUE, axes are shown on the plot; Default: `FALSE`. +#' @param aspect.ratio Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: `FALSE`. +#' @param HGNC.lookup If TRUE, HGNC gene symbol lookup is performed; Default: `TRUE`. +#' @param make.uppercase If TRUE, feature names are converted to uppercase; Default: `FALSE`. #' @param qlow Lower quantile for the color scale; Default: 'q10'. #' @param qhigh Upper quantile for the color scale; Default: 'q90'. -#' @param check_for_2D If TRUE, checks if UMAP is 2 dimensional; Default: TRUE. +#' @param check_for_2D If TRUE, checks if UMAP is 2 dimensional; Default: `TRUE`. #' @param caption Adds a caption to the ggplot object; Default: dynamically generated from `obj`. #' @param ... Additional parameters to pass to the internally called functions. #' @@ -2189,12 +2212,12 @@ qUMAP <- function( DefaultAssay(obj) <- assay gg.obj <- Seurat::FeaturePlot(obj, - features = feature, - reduction = reduction, - min.cutoff = qlow, max.cutoff = qhigh, - ncol = nr.cols, - split.by = splitby, - ... + features = feature, + reduction = reduction, + min.cutoff = qlow, max.cutoff = qhigh, + ncol = nr.cols, + split.by = splitby, + ... ) + ggtitle(label = title, subtitle = sub) + if (!axes) NoAxes() else NULL @@ -2203,7 +2226,7 @@ qUMAP <- function( if (!isFALSE(caption)) gg.obj <- gg.obj + ggplot2::labs(caption = caption) if (save.plot) { - fname <- ww.FnP_parser(sppp(prefix, toupper(reduction), feature, assay, paste0(ncol(obj),"c"), suffix), if (PNG) "png" else "pdf") + fname <- ww.FnP_parser(sppp(prefix, toupper(reduction), feature, assay, paste0(ncol(obj), "c"), suffix), if (PNG) "png" else "pdf") try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w)) # , ncol = 1, nrow = 1 } return(gg.obj) @@ -2236,16 +2259,16 @@ qUMAP <- function( #' @param palette Color palette for generating cluster colors; Default: 'glasbey'. #' @param highlight.clusters Specific clusters to be highlighted; optional; Default: NULL. #' @param cells.highlight Specific cells to be highlighted; optional; Default: NULL. -#' @param label Show cluster labels; Default: TRUE. -#' @param repel Repel labels to avoid overlap; Default: TRUE. +#' @param label Show cluster labels; Default: `TRUE`. +#' @param repel Repel labels to avoid overlap; Default: `TRUE`. #' @param legend Show legend; Default: opposite of `label`. #' @param legend.pos Position of legend; Default: 'NULL'. -#' @param axes Show axes; Default: FALSE. -#' @param aspect.ratio Fixed aspect ratio for the plot; Default: TRUE. +#' @param axes Show axes; Default: `FALSE`. +#' @param aspect.ratio Fixed aspect ratio for the plot; Default: `TRUE`. #' @param MaxCategThrHP Maximum number of categories before simplification; Default: 200. -#' @param save.plot Save plot to file; Default: TRUE. -#' @param PNG Save as PNG (TRUE) or PDF (FALSE); Default: TRUE. -#' @param check_for_2D Ensure UMAP is 2D; Default: TRUE. +#' @param save.plot Save plot to file; Default: `TRUE`. +#' @param PNG Save as PNG (TRUE) or PDF (FALSE); Default: `TRUE`. +#' @param check_for_2D Ensure UMAP is 2D; Default: `TRUE`. #' @param ... Additional parameters for `DimPlot`. #' #' @examples @@ -2266,7 +2289,7 @@ clUMAP <- function( sub = NULL, prefix = NULL, suffix = make.names(sub), - caption = .parseBasicObjStats(obj, simple = TRUE), # try(.parseKeyParams(obj = obj), silent = T), + caption = .parseBasicObjStats(obj, simple = TRUE), # try(.parseKeyParams(obj = obj), silent = TRUE), reduction = "umap", splitby = NULL, label.cex = 7, h = 7, w = NULL, @@ -2298,7 +2321,7 @@ clUMAP <- function( tictoc::tic() if (is.null(ident)) { - ident <- GetNamedClusteringRuns(obj, v = F)[1] + ident <- GetNamedClusteringRuns(obj, v = FALSE)[1] message("Identity not provided. Plotting: ", ident) } @@ -2348,7 +2371,7 @@ clUMAP <- function( if (!missing(cells.highlight)) { highlight.these <- cells.highlight message("Highlighting ", length(highlight.these), " cells, e.g.: ", head(highlight.these)) - message("cols.highlight: ", cols.highlight ," | sizes.highlight: ", sizes.highlight) + message("cols.highlight: ", cols.highlight, " | sizes.highlight: ", sizes.highlight) } if (is.null(cols)) { @@ -2360,7 +2383,7 @@ clUMAP <- function( } } - # if (F) cols <- adjustcolor(cols, alpha.f = alpha) + # if (FALSE) cols <- adjustcolor(cols, alpha.f = alpha) if (!is.null(highlight.these)) { cols <- "lightgrey" @@ -2381,7 +2404,8 @@ clUMAP <- function( cols.highlight = cols.highlight, sizes.highlight = sizes.highlight, label = label, repel = repel, label.size = label.cex, - ...) + + ... + ) + ggtitle(label = title, subtitle = sub) + if (!legend) NoLegend() else NULL } @@ -2395,7 +2419,7 @@ clUMAP <- function( # Save plot ___________________________________________________________ if (save.plot) { - pname <- sppp(prefix, plotname, paste0(ncol(obj),"c"), suffix, sppp(highlight.clusters)) + pname <- sppp(prefix, plotname, paste0(ncol(obj), "c"), suffix, sppp(highlight.clusters)) fname <- ww.FnP_parser(pname, if (PNG) "png" else "pdf") try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w)) # , ncol = 1, nrow = 1 } @@ -2419,7 +2443,12 @@ clUMAP <- function( #' @param COI Vector of cluster IDs to highlight on the UMAP plot; #' Default: `c("0", "2", "4")`. #' @param ident Name of the metadata column containing cluster IDs; -#' Default: 'integrated_snn_res.0.3'. +#' Default: `GetClusteringRuns()[1]`. +#' @param h Height of the plot; Default: `7`. +#' @param w Width of the plot; Default: `5`. +#' @param show_plot Logical; if `TRUE`, the plot will be displayed in the RStudio viewer; +#' Default: `TRUE`. +#' @param ... Additional arguments to be passed to the `DimPlot` function.#' #' #' @return Saves a UMAP plot highlighting specified clusters to the current working directory. #' The function itself does not return an object within R. @@ -2441,25 +2470,27 @@ umapHiLightSel <- function(obj = combined.obj, COI = c("0", "2", "4"), ident = GetClusteringRuns()[1], h = 7, w = 5, - show_plot = T, + show_plot = TRUE, ...) { stopifnot(is(obj, "Seurat"), - "Ident no found the object!" =ident %in% colnames(obj@meta.data), - "Not all clusters in COI are found the object!" = all(COI %in% unique(obj@meta.data[[ident]])) + "Ident no found the object!" = ident %in% colnames(obj@meta.data), + "Not all clusters in COI are found the object!" = all(COI %in% unique(obj@meta.data[[ident]])) ) cellsSel <- getCellIDs.from.meta(ident = ident, ident_values = COI, obj = obj) pl <- Seurat::DimPlot(obj, - reduction = "umap", - group.by = ident, - label = TRUE, - cells.highlight = cellsSel, - ... + reduction = "umap", + group.by = ident, + label = TRUE, + cells.highlight = cellsSel, + ... ) - if(show_plot) print(pl) + if (show_plot) print(pl) - ggplot2::ggsave(filename = extPNG(kollapse("cells", COI, collapseby = ".")), - height = h, width = w) + ggplot2::ggsave( + filename = extPNG(kollapse("cells", COI, collapseby = ".")), + height = h, width = w + ) } @@ -2482,7 +2513,7 @@ umapHiLightSel <- function(obj = combined.obj, #' @export DimPlot.ClusterNames <- function( obj = combined.obj, - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], reduction = "umap", title = ident, ...) { @@ -2506,10 +2537,10 @@ DimPlot.ClusterNames <- function( #' @description Save multiple FeaturePlots, as jpeg, on A4 for each gene, which are stored as a list of gene names. #' @param list.of.genes List of gene names for which the plots are to be generated. No default. #' @param obj Seurat object, Default: combined.obj -#' @param subdir Should plots be saved in a sub-directory? Default: TRUE +#' @param subdir Should plots be saved in a sub-directory? Default: `TRUE`. #' @param foldername Folder name to save the generated plots. Default: The name of the list of genes. #' @param subtitle.from.names Should the subtitle be extracted from the names of the gene symbols, -#' eg: `c("Astrocytes" = "AQP4")` ? Default: TRUE +#' eg: `c("Astrocytes" = "AQP4")` ? Default: `TRUE`. #' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap' #' @param intersectionAssay The assay to intersect with, either 'RNA' or 'integrated'. Default: 'RNA' #' @param layout Layout orientation of the plot. Default: 'wide' @@ -2522,7 +2553,7 @@ DimPlot.ClusterNames <- function( #' @param prefix Prefix for the plot filenames. Default: NULL #' @param suffix Suffix for the plot filenames. Default: NULL #' @param background_col Background color of the plots. Default: "white" -#' @param saveGeneList Should the list of genes be saved? Default: FALSE +#' @param saveGeneList Should the list of genes be saved? Default: `FALSE`. #' @param w Width of the plot. Default: 8.27 #' @param h Height of the plot. Default: 11.69 #' @param scaling Scaling factor for plot size. Default: 1 @@ -2565,13 +2596,13 @@ multiFeaturePlot.A4 <- function( final.foldername <- FixPlotName(paste0(foldername, "-", plot.reduction, suffix)) if (subdir) create_set_SubDir(final.foldername, "/", verbose = FALSE) - if(is.null(names(list.of.genes))) subtitle.from.names <- FALSE + if (is.null(names(list.of.genes))) subtitle.from.names <- FALSE list.of.genes.found <- check.genes( list.of.genes = list.of.genes, obj = obj, assay.slot = intersectionAssay, makeuppercase = FALSE ) - if(subtitle.from.names) names(list.of.genes.found) <- as.character(flip_value2name(list.of.genes)[list.of.genes.found]) + if (subtitle.from.names) names(list.of.genes.found) <- as.character(flip_value2name(list.of.genes)[list.of.genes.found]) DefaultAssay(obj) <- intersectionAssay if (!is.null(cex.min)) cex <- max(cex.min, cex) @@ -2608,7 +2639,7 @@ multiFeaturePlot.A4 <- function( for (j in 1:length(plot.list)) { plot.list[[j]] <- plot.list[[j]] + NoLegend() + NoAxes() if (aspect.ratio) plot.list[[j]] <- plot.list[[j]] + ggplot2::coord_fixed(ratio = aspect.ratio) - if (subtitle.from.names) plot.list[[j]] <- plot.list[[j]] + ggplot2::ggtitle(label = genes[j], subtitle = names(genes)[j] ) + if (subtitle.from.names) plot.list[[j]] <- plot.list[[j]] + ggplot2::ggtitle(label = genes[j], subtitle = names(genes)[j]) } # browser() @@ -2693,8 +2724,10 @@ multiSingleClusterHighlightPlots.A4 <- function( message(" > Running multiSingleClusterHighlightPlots.A4...") NrCellsPerCluster <- sort(table(obj[[ident]]), decreasing = TRUE) - stopifnot("Some clusters too small (<20 cells). See: table(obj[[ident]]) | Try: removeResidualSmallClusters()" = - all(NrCellsPerCluster > 20)) + stopifnot( + "Some clusters too small (<20 cells). See: table(obj[[ident]]) | Try: removeResidualSmallClusters()" = + all(NrCellsPerCluster > 20) + ) tictoc::tic() ParentDir <- OutDir @@ -2808,7 +2841,7 @@ qClusteringUMAPS <- function( stopifnot("None of the idents found" = n.found > 0) message(kppws(n.found, " found of ", idents)) - if(n.found >5) { + if (n.found > 5) { idents.found <- idents.found[1:4] message("Only the first 4 idents will be plotted: ", idents.found) } @@ -2868,8 +2901,10 @@ qGeneExpressionUMAPS <- function( # Check that the features are in the object features.found <- intersect(features, c(colnames(obj@meta.data), rownames(obj))) n.found <- length(features.found) - stopifnot("None of the features found" = n.found > 1, - "Only 4 features are allowed" = n.found <5) + stopifnot( + "None of the features found" = n.found > 1, + "Only 4 features are allowed" = n.found < 5 + ) message(kppws(n.found, "found of", length(features), "features:", features)) @@ -2923,9 +2958,11 @@ plotQUMAPsInAFolder <- function(genes, obj = combined.obj, message(" > Running plotQUMAPsInAFolder...") # Input checks - stopifnot(is.character(genes), - is.null(foldername) || is.character(foldername), - is.character(plot.reduction)) + stopifnot( + is.character(genes), + is.null(foldername) || is.character(foldername), + is.character(plot.reduction) + ) ParentDir <- OutDir if (is.null(foldername)) foldername <- deparse(substitute(genes)) @@ -2990,7 +3027,7 @@ PlotTopGenesPerCluster <- function( ls.topMarkers <- splitbyitsnames(topX.markers) for (i in 1:length(ls.topMarkers)) { multiFeaturePlot.A4( - list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = T, foldername = ppp('TopGenes.umaps'), + list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = TRUE, foldername = ppp("TopGenes.umaps"), prefix = ppp("DEG.markers.res", cl_res, "cluster", names(ls.topMarkers)[i]) ) } @@ -3043,7 +3080,7 @@ qQC.plots.BrainOrg <- function( # Raise a warning if there are any NAs if (sum(na_counts) > 0) { warning(sprintf("There are %d NA values found\n", na_counts), - immediate. = TRUE + immediate. = TRUE ) } @@ -3071,7 +3108,7 @@ qQC.plots.BrainOrg <- function( #' #' @param obj Seurat object for visualization; Default: `combined.obj`. #' @param custom.genes Logical indicating whether to use a custom set of genes. -#' If FALSE, a predefined list of key brain organoid markers is used; Default: FALSE. +#' If FALSE, a predefined list of key brain organoid markers is used; Default: `FALSE`. #' @param suffix Suffix for the folder name where the plots are saved; Default: "". #' #' @examples @@ -3146,8 +3183,8 @@ qMarkerCheck.BrainOrg <- function(obj = combined.obj, custom.genes = FALSE, PlotTopGenes <- function(obj = combined.obj, n = 32, exp.slot = "expr.q99") { message("Using obj@misc$", exp.slot) stopifnot(inherits(obj, "Seurat"), - "Requires calling calc.q99.Expression.and.set.all.genes before. " = - exp.slot %in% names(obj@misc) + "Requires calling calc.q99.Expression.and.set.all.genes before. " = + exp.slot %in% names(obj@misc) ) Highest.Expressed.Genes <- names(head(sort(obj@misc[[exp.slot]], decreasing = TRUE), n = n)) @@ -3172,7 +3209,7 @@ PlotTopGenes <- function(obj = combined.obj, n = 32, exp.slot = "expr.q99") { #' @param dim Number of dimensions in the reduction to consider; Default: 2. #' @param reduction Dimension reduction technique to modify ('umap', 'tsne', or 'pca'); Default: 'umap'. #' @param flip Axis (or axes) to flip; can be 'x', 'y', or 'xy' to flip both; Default: "x". -#' @param FlipReductionBackupToo Boolean indicating whether to also flip coordinates in the backup slot; Default: TRUE. +#' @param FlipReductionBackupToo Boolean indicating whether to also flip coordinates in the backup slot; Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -3217,12 +3254,12 @@ FlipReductionCoordinates <- function( #' Default: `combined.obj`. #' @param dim Dimension along which to order clusters (1 for the first dimension, typically horizontal); #' Default: 1. -#' @param swap If TRUE, reverses the ordering direction; Default: FALSE. +#' @param swap If TRUE, reverses the ordering direction; Default: `FALSE`. #' @param reduction Dimension reduction technique used for cluster positioning ('umap', 'tsne', or 'pca'); #' Default: 'umap'. #' @param ident Clustering resolution identifier used to fetch cluster labels from `obj` metadata; #' Default: 'integrated_snn_res.0.5'. -#' @param plot If TRUE, plots the UMAP with new cluster names; Default: TRUE. +#' @param plot If TRUE, plots the UMAP with new cluster names; Default: `TRUE`. #' #' @examples #' \dontrun{ @@ -3290,10 +3327,14 @@ AutoNumber.by.UMAP <- function(obj = combined.obj, #' @param lab A vector of gene symbols to label on the plot. #' @param suffix A string to append to the filename/title of the plot. #' @param title The title of the plot. -#' @param subtitle The subtitle of the plot. +#' @param suffix A string to append to the filename/title of the plot. +#' @param caption The first line of caption of the plot. +#' @param caption2 The second line of caption of the plot. #' @param x The x-axis, which is typically the average log2 fold change. #' @param y The y-axis, which is typically the adjusted p-value. #' @param selectLab A vector of gene symbols to select for labeling. +#' @param min.p The minimum p-value, to trim high values on the Y-axis. +#' @param max.l2fc The maximum log2 fold change, to trim high values on the X-axis. #' @param min.pct.cells The minimum percentage of cells in which a gene must be expressed to be included in the plot. #' @param pCutoffCol The column in the toptable that contains the p-value cutoff. #' @param pCutoff The p-value cutoff. @@ -3304,7 +3345,7 @@ AutoNumber.by.UMAP <- function(obj = combined.obj, #' @param min.p The minimum p-value, to trim high values on the Y-axis. #' @param h The height of the plot. #' @param w The width of the plot. -#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @param ... Pass any other parameter to `EnhancedVolcano::EnhancedVolcano()`. #' #' @return A ggplot object. #' @@ -3317,76 +3358,91 @@ scEnhancedVolcano <- function( x = "avg_log2FC", y = "p_val_adj", lab = rownames(toptable), - suffix = NULL, title = paste("DGEA"), - caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)), - caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"), selectLab = trail(lab, 10), min.p = 1e-50, max.l2fc = Inf, min.pct.cells = 0.1, pCutoffCol = "p_val_adj", - pCutoff = 1e-3, + pCutoff = 1e-3, FCcutoff = 1, + suffix = NULL, + caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)), + caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"), count_stats = TRUE, - drawConnectors = T, max.overlaps = Inf, + drawConnectors = TRUE, max.overlaps = Inf, h = 9, w = h, ...) { # - message("\nMin. log2fc: ", FCcutoff, "\nMax. p-adj: ", pCutoff, - "\nMin. p-adj (trim high y-axis): ", min.p, - "\nMin. pct cells expressing: ", min.pct.cells) - stopifnot(nrow(toptable) >5) + message( + "\nMin. log2fc: ", FCcutoff, "\nMax. p-adj: ", pCutoff, + "\nMin. p-adj (trim high y-axis): ", min.p, + "\nMin. pct cells expressing: ", min.pct.cells + ) + stopifnot(nrow(toptable) > 5) # Filter min. cells expressing. toptable <- toptable |> dplyr::filter(pct.1 > min.pct.cells | pct.2 > min.pct.cells) # calculate true min pct cells expressing (maybe input prefiltered above thr. already). - min.pct.cells <- toptable |> select(pct.1, pct.2) |> as.matrix() |> rowMax() |> min() + min.pct.cells <- toptable |> + select(pct.1, pct.2) |> + as.matrix() |> + rowMax() |> + min() # Clip p-values. toptable[["p_val_adj"]] <- - clip.at.fixed.value(x = toptable[["p_val_adj"]], thr = min.p, high = F) + clip.at.fixed.value(x = toptable[["p_val_adj"]], thr = min.p, high = FALSE) # Clip log2FC. if (max.l2fc < Inf) { toptable[["avg_log2FC"]] <- - clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = -max.l2fc, high = F) + clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = -max.l2fc, high = FALSE) toptable[["avg_log2FC"]] <- - clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = max.l2fc, high = T) + clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = max.l2fc, high = TRUE) } # Add statistical information to the subtitle. if (count_stats) { - enr_stats <- unlist(countRelevantEnrichments(df = toptable, logfc_col = x, pval_col = y, - logfc_cutoff = FCcutoff, pval_cutoff = pCutoff)) - stat_info <- kppws("Genes", intermingle2vec(names(enr_stats), enr_stats),"(red)") - subtitle <- paste0(stat_info, "\n", - paste("Cutoffs: max.p_adj: ", pCutoff, " | min.log2FC: ", FCcutoff, - " | min.pct.cells: ", min.pct.cells)) + enr_stats <- unlist(countRelevantEnrichments( + df = toptable, logfc_col = x, pval_col = y, + logfc_cutoff = FCcutoff, pval_cutoff = pCutoff + )) + stat_info <- kppws("Genes", intermingle2vec(names(enr_stats), enr_stats), "(red)") + subtitle <- paste0( + stat_info, "\n", + paste( + "Cutoffs: max.p_adj: ", pCutoff, " | min.log2FC: ", FCcutoff, + " | min.pct.cells: ", min.pct.cells + ) + ) } caption <- paste0(caption, "\n", caption2) # Create an enhanced volcano plot. # try.dev.off(); pobj <- EnhancedVolcano::EnhancedVolcano( - toptable = toptable - , x = x, y = y - , title = title, subtitle = subtitle - , lab = lab, selectLab = selectLab - , caption = caption - , pCutoffCol = pCutoffCol - , pCutoff = pCutoff - , FCcutoff = FCcutoff - , drawConnectors=drawConnectors - , max.overlaps = max.overlaps - , ...) + toptable = toptable, + x = x, y = y, + title = title, subtitle = subtitle, + lab = lab, selectLab = selectLab, + caption = caption, + pCutoffCol = pCutoffCol, + pCutoff = pCutoff, + FCcutoff = FCcutoff, + drawConnectors = drawConnectors, + max.overlaps = max.overlaps, + ... + ) print(pobj) # Save the plot. - qqSave(ggobj = pobj, title = paste0("Volcano.", make.names(title), suffix), - h = h, w = w) + qqSave( + ggobj = pobj, title = paste0("Volcano.", make.names(title), suffix), + h = h, w = w + ) return(pobj) } @@ -3410,8 +3466,8 @@ scEnhancedVolcano <- function( .estMinimumFC <- function(df, col = "avg_log2FC") { lfc <- df[[col]] - lfc_enr <- min(lfc[lfc>0]) - lfc_depl <- abs(max(lfc[lfc<0])) + lfc_enr <- min(lfc[lfc > 0]) + lfc_depl <- abs(max(lfc[lfc < 0])) estim_min_l2fc <- min(lfc_enr, lfc_depl) return(iround(2^estim_min_l2fc)) } @@ -3444,11 +3500,13 @@ countRelevantEnrichments <- function(df, pval_col = "p_val_adj", logfc_col = "avg_log2FC", pval_cutoff = 1e-2, logfc_cutoff = 1) { # - stopifnot(is.data.frame(df), - pval_col %in% colnames(df), - logfc_col %in% colnames(df), - is.numeric(pval_cutoff), - is.numeric(logfc_cutoff)) + stopifnot( + is.data.frame(df), + pval_col %in% colnames(df), + logfc_col %in% colnames(df), + is.numeric(pval_cutoff), + is.numeric(logfc_cutoff) + ) relevant_genes <- df |> dplyr::filter(!!sym(pval_col) <= pval_cutoff) @@ -3480,10 +3538,11 @@ countRelevantEnrichments <- function(df, #' @param pAdjustMethod Character. Method for p-value adjustment. Default: 'BH' for Benjamini-Hochberg. #' @param pvalueCutoff Numeric. P-value cutoff for significance. Default: 0.05. #' @param qvalueCutoff Numeric. Q-value cutoff for significance. Default: 0.2. -#' @param save Logical. Save the results as a data frame. Default: TRUE. +#' @param save Logical. Save the results as a data frame. Default: `TRUE`. #' @param suffix Character. Suffix to append to the output file name. Default: 'GO.Enrichments'. -#' @param check.gene.symbols Logical. Check gene symbols for validity. Default: TRUE. -#' @importFrom clusterProfiler enrichGO +#' @param check.gene.symbols Logical. Check gene symbols for validity. Default: `TRUE`. +#' @param ... Additional arguments to pass to `clusterProfiler::enrichGO`. + #' #' @return A data frame with GO enrichment results. #' @export @@ -3495,7 +3554,6 @@ countRelevantEnrichments <- function(df, #' go_results <- performGOEnrichment(gene_list, background_genes, "org.Hs.eg.db", "SYMBOL", "BP") #' print(go_results) #' } - scGOEnrichment <- function(genes, universe = NULL, org_db = "org.Hs.eg.db", key_type = "SYMBOL", ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.2, @@ -3504,25 +3562,25 @@ scGOEnrichment <- function(genes, universe = NULL, check.gene.symbols = TRUE, ...) { # Load required library - if (!requireNamespace("clusterProfiler", quietly = TRUE)) { - stop("The 'clusterProfiler' package is required but not installed.") - } + stopifnot("Package 'clusterProfiler' must be installed to use this function." = require("clusterProfiler")) # Input assertions stopifnot( is.character(genes) | is.null(genes), (is.character(universe) | is.null(universe)), - (length(universe) > 100 | is.null(universe)), + (length(universe) > 100 | is.null(universe)), is.character(org_db), is.character(key_type), is.character(ont), is.character(ont) ) - if ( is.null(genes) | length(genes) == 0 ) return(NULL) + if (is.null(genes) | length(genes) == 0) { + return(NULL) + } # check.gene.symbols if (check.gene.symbols) { x <- checkGeneSymbols(genes, species = "human") - genes <- x[x[ , "Approved"] , 1] + genes <- x[x[, "Approved"], 1] } message("Performing enrichGO() analysis...") @@ -3537,15 +3595,20 @@ scGOEnrichment <- function(genes, universe = NULL, pvalueCutoff = pvalueCutoff, qvalueCutoff = qvalueCutoff, ont = ont, - ...) + ... + ) nr_of_enr_terms <- length(go_results@result$"ID") # Output assertions - if(nrow(go_results) < 1) warning("No enriched terms found!", immediate. = TRUE) + if (nrow(go_results) < 1) warning("No enriched terms found!", immediate. = TRUE) - if(save) xsave(go_results, suffix = kpp("enr", nr_of_enr_terms, suffix), - showMemObject = F, saveParams = F, allGenes = F) + if (save) { + xsave(go_results, + suffix = kpp("enr", nr_of_enr_terms, suffix), + showMemObject = FALSE, saveParams = FALSE, allGenes = FALSE + ) + } message("\nNr of enriched terms: ", nr_of_enr_terms) return(go_results) @@ -3566,10 +3629,12 @@ scGOEnrichment <- function(genes, universe = NULL, #' @param title Character. Title of the plot. Default: "GO Enrichment Analysis" followed by `tag`. #' @param subtitle Character. Subtitle of the plot. Default: NULL. #' @param caption Character. Caption of the plot. Default: constructed from input parameters. -#' @param save Logical. Whether to save the plot to a file. Default: TRUE. +#' @param save Logical. Whether to save the plot to a file. Default: `TRUE`. +#' @param h Height of the plot canvas, calculated as the height of an A4 page times `scale`; Default: `8.27 * scale`. +#' @param w Width of the plot canvas, calculated as the width of an A4 page times `scale`; Default: `11.69 * scale`. +#' @param also.pdf Save plot in both png and pdf formats. #' @param ... Additional arguments passed to `enrichplot::barplot.enrichResult`. #' -#' @import enrichplot #' @importFrom ggplot2 labs #' #' @return None. The function prints the plot and optionally saves it. @@ -3580,33 +3645,42 @@ scGOEnrichment <- function(genes, universe = NULL, #' df.enrichment <- data.frame() # Example enrichment results data frame #' plotGOEnrichment(df.enrichment) #' } - scBarplotEnrichr <- function(df.enrichment, tag = "...", universe = NULL, title = paste("GO Enriched Terms", tag), subtitle = kppws("Input: ", substitute(df.enrichment)), - caption = paste0("Input genes: ", length(df.enrichment@'gene'), - " | Enriched terms: ", nrow(df.enrichment), - " | background genes: ", length(universe) ), + caption = paste0( + "Input genes: ", length(df.enrichment@"gene"), + " | Enriched terms: ", nrow(df.enrichment), + " | background genes: ", length(universe) + ), save = TRUE, w = 10, h = 10, - also.pdf = F, + also.pdf = FALSE, ...) { + stopifnot("Package 'enrichplot' must be installed to use this function." = require("enrichplot")) - if(tag == "...") warning("Please provide a tag describing where are the enrichments.", immediate. = TRUE) - nr_input_genes <- length(df.enrichment@'gene') + if (tag == "...") warning("Please provide a tag describing where are the enrichments.", immediate. = TRUE) + nr_input_genes <- length(df.enrichment@"gene") pobj <- - if(nrow(df.enrichment) < 1 | is.null (df.enrichment)) { + if (nrow(df.enrichment) < 1 | is.null(df.enrichment)) { warning("No enriched terms input!", immediate. = TRUE) - ggplot() + theme_void() + annotate("text", x = 1, y = 1, label = "NO ENRICHMENT", - size = 8, color = "red", hjust = 0.5, vjust = 0.5) + ggplot() + + theme_void() + + annotate("text", + x = 1, y = 1, label = "NO ENRICHMENT", + size = 8, color = "red", hjust = 0.5, vjust = 0.5 + ) } else if (nr_input_genes < 5) { warning("Very few inputs for GOENR", immediate. = TRUE) - ggplot() + theme_void() + annotate("text", x = 1, y = 1, label = "TOO FEW GENES (<5)", - size = 8, color = "red", hjust = 0.5, vjust = 0.5) - + ggplot() + + theme_void() + + annotate("text", + x = 1, y = 1, label = "TOO FEW GENES (<5)", + size = 8, color = "red", hjust = 0.5, vjust = 0.5 + ) } else { enrichplot:::barplot.enrichResult(df.enrichment, showCategory = 20) } @@ -3646,7 +3720,7 @@ scBarplotEnrichr <- function(df.enrichment, filterGoEnrichment <- function(df.enrichments, pvalueCutoff = NULL, qvalueCutoff = NULL, - colname = 'Description') { + colname = "Description") { # Input assertions stopifnot( "enrichResult" %in% class(df.enrichments), @@ -3655,11 +3729,13 @@ filterGoEnrichment <- function(df.enrichments, !is.null(df.enrichments@qvalueCutoff) ) - pvalueCutoff <- if(is.null(pvalueCutoff)) df.enrichments@pvalueCutoff else pvalueCutoff - qvalueCutoff <- if(is.null(qvalueCutoff)) df.enrichments@qvalueCutoff else qvalueCutoff + pvalueCutoff <- if (is.null(pvalueCutoff)) df.enrichments@pvalueCutoff else pvalueCutoff + qvalueCutoff <- if (is.null(qvalueCutoff)) df.enrichments@qvalueCutoff else qvalueCutoff - message(paste("Filtering GO enrichment results with \np-value cutoff", - pvalueCutoff, "and q-value cutoff", qvalueCutoff)) + message(paste( + "Filtering GO enrichment results with \np-value cutoff", + pvalueCutoff, "and q-value cutoff", qvalueCutoff + )) # Filter and retrieve GO descriptions <- df.enrichments@result |> @@ -3716,17 +3792,17 @@ filterGoEnrichment <- function(df.enrichments, countEnrichedDepletedGenes <- function(df, min_padj = 0.01, min_logFC = 0.5, # genes = rownames(df), - colname.p = 'p_val_adj', colname.lFC = 'avg_log2FC') { - - stopifnot(min_padj > 0, - min_logFC > 0, - colname.p %in% colnames(df), - colname.lFC %in% colnames(df) + colname.p = "p_val_adj", colname.lFC = "avg_log2FC") { + stopifnot( + min_padj > 0, + min_logFC > 0, + colname.p %in% colnames(df), + colname.lFC %in% colnames(df) ) # Filter the dataframe for enriched genes idx.enr <- df[[colname.p]] <= min_padj & df[[colname.lFC]] >= min_logFC - enriched_genes <- df[ idx.enr, ] + enriched_genes <- df[idx.enr, ] enriched_symbols <- rownames(enriched_genes) # enriched_symbols <- genes[idx.enr] @@ -3738,13 +3814,15 @@ countEnrichedDepletedGenes <- function(df, min_padj = 0.01, min_logFC = 0.5, # depleted_symbols <- genes[idx.depl] # Create the named numeric vectors - gene_counts <- c('Enriched' = nrow(enriched_genes), 'Depleted' = nrow(depleted_genes)) - print("gene_counts"); print(gene_counts) - parameters <- c('min_padj' = min_padj, 'min_logFC' = min_logFC) - print("parameters"); print(parameters) + gene_counts <- c("Enriched" = nrow(enriched_genes), "Depleted" = nrow(depleted_genes)) + print("gene_counts") + print(gene_counts) + parameters <- c("min_padj" = min_padj, "min_logFC" = min_logFC) + print("parameters") + print(parameters) # Create the list of gene symbols - gene_symbols <- list('Enriched' = enriched_symbols, 'Depleted' = depleted_symbols) + gene_symbols <- list("Enriched" = enriched_symbols, "Depleted" = depleted_symbols) # Return the results as a list result <- list(gene_counts, parameters, gene_symbols) @@ -3819,7 +3897,7 @@ countEnrichedDepletedGenes <- function(df, min_padj = 0.01, min_logFC = 0.5, #' #' @param plot_list A list containing ggplot objects to be arranged and saved. #' @param pname Boolean indicating if the plot name should be automatically generated; -#' if FALSE, the name is based on `plot_list` and `suffix`; Default: FALSE. +#' if FALSE, the name is based on `plot_list` and `suffix`; Default: `FALSE`. #' @param suffix Suffix to be added to the generated filename if `pname` is FALSE; Default: NULL. #' @param scale Scaling factor for adjusting the plot size; Default: 1. #' @param nrow Number of rows in the plot arrangement; Default: 2. @@ -3866,7 +3944,7 @@ save2plots.A4 <- function( #' compact comparison of different visualizations or clustering results. #' #' @param plot_list A list containing ggplot objects to be arranged and saved; each object represents one panel. -#' @param pname Plot name; if FALSE, a name is generated automatically based on `plot_list` and `suffix`; Default: FALSE. +#' @param pname Plot name; if FALSE, a name is generated automatically based on `plot_list` and `suffix`; Default: `FALSE`. #' @param suffix Suffix to be added to the filename; Default: NULL. #' @param scale Scaling factor for adjusting the size of the overall plot canvas; Default: 1. #' @param nrow Number of rows to arrange the plots in; Default: 2. @@ -4034,14 +4112,14 @@ ww.check.quantile.cutoff.and.clip.outliers <- function(expr.vec = plotting.data[ plot3D.umap.gene <- function( gene = "TOP2A", obj = combined.obj, - annotate.by = GetNamedClusteringRuns(obj = obj, v = F)[1], + annotate.by = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], quantileCutoff = .99, def.assay = c("integrated", "RNA")[2], suffix = NULL, alpha = .5, dotsize = 1.25, col.names = c("umap_1", "umap_2", "umap_3"), - assay = 'RNA', + assay = "RNA", ...) { # Input assertions ____________________________________ @@ -4132,13 +4210,12 @@ plot3D.umap.gene <- function( plot3D.umap <- function( obj = combined.obj, - category = GetNamedClusteringRuns(obj = obj, v = F)[1], + category = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], annotate.by = category, suffix = NULL, dotsize = 1.25, col.names = c("umap_1", "umap_2", "umap_3"), ...) { - message("category: ", category) message("annotate.by: ", annotate.by) @@ -4411,10 +4488,10 @@ Plot3D.ListOfGenes <- function( #' @title Plot3D.ListOfCategories #' #' @description This function plots and saves a list of 3D UMAP or tSNE plots using plotly. -#' @param obj A Seurat object for which the plot is to be created. Default is 'combined.obj'. -#' @param annotate.by Character vector specifying the metadata column to be used for annotating the plot. Default is 'integrated_snn_res.0.7'. -#' @param cex Numeric value specifying the point size on the plot. Default is 1.25. -#' @param default.assay Character vector specifying the assay to be used. Default is 'RNA' (second element in the vector c("integrated", "RNA")). +#' @param obj A Seurat object for which the plot is to be created. Default: 'combined.obj'. +#' @param annotate.by Character vector specifying the metadata column to be used for annotating the plot. Default: 'integrated_snn_res.0.7'. +#' @param cex Numeric value specifying the point size on the plot. Default: 1.25. +#' @param default.assay Character vector specifying the assay to be used. Default: 'RNA' (second element in the vector c("integrated", "RNA")). #' @param ListOfCategories Character vector specifying the categories to be included in the plot. Default categories are "v.project", "experiment", "Phase", "integrated_snn_res.0.7". #' @param SubFolderName String specifying the name of the subfolder where the plots will be saved. By default, it's created using the function ppp("plot3D", substitute(ListOfCategories)). #' @examples @@ -4502,9 +4579,9 @@ panelCorPearson <- function(x, y, digits = 2, prefix = "", cex.cor = 2, method = test <- cor.test(x, y, method = method) Signif <- symnum(test$p.value, - corr = FALSE, na = FALSE, - cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), - symbols = c("***", "**", "*", ".", " ") + corr = FALSE, na = FALSE, + cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), + symbols = c("***", "**", "*", ".", " ") ) cex <- ifelse(missing(cex.cor), 0.8 / strwidth(txt), cex.cor) @@ -4522,13 +4599,13 @@ panelCorPearson <- function(x, y, digits = 2, prefix = "", cex.cor = 2, method = #' #' @param obj A single Seurat object. #' @param NrVarGenes A vector containing the top 20 variable genes for the Seurat object. -#' @param sampleName A string specifying the sample name, used to generate the filename for saving -#' the plot. -#' @param ppp A function for constructing the path and filename for saving the plot. It takes three -#' arguments: a prefix for the filename, a sample name, and the file extension ('pdf'). -#' @param repel A logical value indicating whether to repel the labels to avoid overlap. Default: TRUE. +#' @param repel A logical value indicating whether to repel the labels to avoid overlap. Default: `TRUE`. #' @param plotWidth Numeric value specifying the width of the plot when saved. Default: 7. #' @param plotHeight Numeric value specifying the height of the plot when saved. Default: 5. +#' @param save A logical value indicating whether to save the plot to a PDF file. Default: `TRUE`. +#' @param suffix A string suffix to append to the plot filename. Default: NULL. +#' @param assay The assay to use for the plot. Default: DefaultAssay(obj). +#' @param ... Additional arguments to pass to the Seurat::VariableFeaturePlot function. #' #' @examples #' \dontrun{ @@ -4551,11 +4628,13 @@ suPlotVariableFeatures <- function(obj = combined.obj, NrVarGenes = 15, obj.name <- deparse(substitute(obj)) - plot1 <- Seurat::VariableFeaturePlot(obj, assay = assay) + + plot1 <- Seurat::VariableFeaturePlot(obj, assay = assay, ...) + theme(panel.background = element_rect(fill = "white")) + - labs(title = "Variable Genes", - subtitle = kppws(obj.name, suffix), - caption = paste("Assay:", assay, "|", idate())) + labs( + title = "Variable Genes", + subtitle = kppws(obj.name, suffix), + caption = paste("Assay:", assay, "|", idate()) + ) # Assuming LabelPoints is defined elsewhere and available for use. @@ -4583,5 +4662,3 @@ suPlotVariableFeatures <- function(obj = combined.obj, NrVarGenes = 15, # Notes -------------------------------------------------------------------------------------------- # plotMetadataCategPie() is in Seurat.Utils.Metadata.R - - diff --git a/R/Seurat.Utils.Visualization.R.bac b/R/Seurat.Utils.Visualization.R.bac new file mode 100644 index 0000000..a1110cc --- /dev/null +++ b/R/Seurat.Utils.Visualization.R.bac @@ -0,0 +1,4612 @@ +# ____________________________________________________________________ +# Seurat.Utils.Visualization.R ---- +# ____________________________________________________________________ +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Visualization.R") +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") +# devtools::load_all("~/GitHub/Packages/Seurat.utils") +# devtools::document("~/GitHub/Packages/Seurat.utils"); devtools::load_all("~/GitHub/Packages/Seurat.utils") + + +# _________________________________________________________________________________________________ +#' @title Plot filtering thresholds and distributions +#' +#' @description This function plots the filtering thresholds and distributions for Seurat objects, +#' using four panels to highlight the relationship between gene- and UMI-counts, and the +#' ribosomal- and mitochondrial-content. !! Default arguments assume that `p` is a list of +#' parameters, present in the global environment, with elements `thr.lp.mito`, `thr.hp.mito`, +#' `thr.lp.ribo`, `thr.hp.ribo`, `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. +#' +#' @param ls.obj A list of Seurat objects to be analyzed. Default: `ls.Seurat`. +#' @param parentdir The parent directory where the plots will be stored. Default: `OutDirOrig`. +#' @param suffices Suffixes used in the output plot file names. Default: Names of the Seurat objects in `ls.obj`. +#' @param filetype The file type of the output plot images. Default: `'.png'`. +#' @param below.mito Lower bound of mitochondrial content. Default: `p$thr.lp.mito`. +#' @param above.mito Upper bound of mitochondrial content. Default: `p$thr.hp.mito`. +#' @param below.ribo Lower bound of ribosomal content. Default: `p$thr.lp.ribo`. +#' @param above.ribo Upper bound of ribosomal content. Default: `p$thr.hp.ribo`. +#' @param below.nFeature_RNA Lower bound of RNA features. Default: `p$thr.lp.nFeature_RNA`. +#' @param above.nFeature_RNA Upper bound of RNA features. Default: `p$thr.hp.nFeature_RNA`. +#' @param subdir Subdirectory within `parentdir` where plots will be stored. Default: generated using a call to `kpp()`. +#' @param transparency Point transparency on scatter plots. Default: 0.25. +#' @param cex Size of points on scatter plots. Default: 0.75. +#' @param theme.used A `ggplot2` theme for all plots. Default: `theme_bw(base_size = 18)`. +#' @param LabelDistFromTop Distance from top for label placement. Default: 200. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # !! Default arguments assume that `p` is a list of parameters, present in the global +#' environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`, +#' `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. +#' PlotFilters(ls.Seurat) +#' } +#' } +#' +#' @seealso \code{\link[ggplot2]{ggplot}}, \code{\link[ggplot2]{labs}}, +#' \code{\link[ggplot2]{geom_point}} +#' +#' @importFrom ggplot2 ggplot ggtitle geom_point +#' @importFrom Stringendo percentage_formatter +#' @importFrom MarkdownHelpers llprint +#' @importFrom cowplot plot_grid save_plot +#' +#' @export +PlotFilters <- function( + ls.obj = ls.Seurat, + par.ls = p, + parentdir = OutDirOrig, + suffices = names(ls.obj), + filetype = ".png", + below.mito = par.ls$"thr.lp.mito", + above.mito = par.ls$"thr.hp.mito", + below.ribo = par.ls$"thr.lp.ribo", + above.ribo = par.ls$"thr.hp.ribo", + below.nFeature_RNA = if ("quantile.thr.lp.nFeature_RNA" %in% names(par.ls)) par.ls$"quantile.thr.lp.nFeature_RNA" else par.ls$"thr.lp.nFeature_RNA", + above.nFeature_RNA = par.ls$"thr.hp.nFeature_RNA", + subdir = FixPlotName( + "Filtering.plots", + "mito", par.ls$"thr.hp.mito", par.ls$"thr.lp.mito", + "ribo", par.ls$"thr.hp.ribo", par.ls$"thr.lp.ribo", + "nFeature", below.nFeature_RNA, above.nFeature_RNA + ), + transparency = 0.25, + cex = 0.75, + theme.used = theme_bw(base_size = 18), + LabelDistFromTop = 200 # for barplot_label +) { + message("Expects a list of Seurat objects, `ls.obj` with names, and a list of parameters, `par.ls` with a defined structure.") + + # Create names based on the Seurat objects, catenating "dataset" and numbers 1:n + if(is.null(suffices)) { + suffices <- paste0("obj_", 1:length(ls.obj)) + message("Provide suffixes unique to each dataset, ideally as names of the list of Seu objects!") + } + + stopifnot( + is.list(ls.obj), is.list(par.ls) | is.null(par.ls), + is.numeric(above.nFeature_RNA), is.numeric(below.nFeature_RNA), + (below.nFeature_RNA > above.nFeature_RNA) | below.nFeature_RNA < 1, # either an absolute feature count or a quantile + is.numeric(above.mito), is.numeric(below.mito), below.mito > above.mito, + is.numeric(above.ribo), is.numeric(below.ribo), below.ribo > above.ribo, + is.character(parentdir), is.character(subdir), is.character(filetype), is.numeric(transparency), is.numeric(cex), + is.character(suffices), length(suffices) == length(ls.obj) + ) + + MarkdownHelpers::llprint( + "We filtered for high quality cells based on the number of genes detected [", above.nFeature_RNA, ";", below.nFeature_RNA, + "] and the fraction of mitochondrial [", percentage_formatter(above.mito), ";", percentage_formatter(below.mito), + "] and ribosomal [", percentage_formatter(above.ribo), ";", percentage_formatter(below.ribo), "] reads." + ) + + theme_set(theme.used) + OutDir <- FixPath(parentdir, subdir) + + print(subdir) + + MarkdownReports::create_set_OutDir(OutDir) + stopifnot(length(suffices) == length(ls.obj)) + + Calculate_nFeature_LowPass <- if (below.nFeature_RNA < 1) below.nFeature_RNA else FALSE + for (i in 1:length(ls.obj)) { + print(suffices[i]) + mm <- ls.obj[[i]]@meta.data + + if (Calculate_nFeature_LowPass < 1) { + below.nFeature_RNA <- floor(quantile(ls.obj[[i]]$"nFeature_RNA", probs = Calculate_nFeature_LowPass)) + iprint("below.nFeature_RNA at", percentage_formatter(Calculate_nFeature_LowPass), "percentile:", below.nFeature_RNA) + } + + AllMetaColumnsPresent <- all(c("nFeature_RNA", "percent.mito", "percent.ribo") %in% colnames(mm)) + if (!AllMetaColumnsPresent) { + print(c("nFeature_RNA", "percent.mito", "percent.ribo")) + print(c("nFeature_RNA", "percent.mito", "percent.ribo") %in% colnames(mm)) + print("Try to run:") + print('objX <- addMetaFraction(obj = objX, col.name = "percent.mito", gene.symbol.pattern = "^MT\\.|^MT-")') + print('objX <- addMetaFraction(obj = objX, col.name = "percent.ribo", gene.symbol.pattern = "^RPL|^RPS")') + stop() + } + + filt.nFeature_RNA <- (mm$"nFeature_RNA" < below.nFeature_RNA & mm$"nFeature_RNA" > above.nFeature_RNA) + filt.below.mito <- (mm$"percent.mito" < below.mito & mm$"percent.mito" > above.mito) + filt.below.ribo <- (mm$"percent.ribo" < below.ribo & mm$"percent.ribo" > above.ribo) + + mm <- cbind(mm, filt.nFeature_RNA, filt.below.mito, filt.below.ribo) + + mm$colour.thr.nFeature <- cut(mm$"nFeature_RNA", + breaks = c(-Inf, above.nFeature_RNA, below.nFeature_RNA, Inf), + labels = c( + paste0("LQ (<", above.nFeature_RNA, ")"), + paste0("HQ (", above.nFeature_RNA, "< X <", below.nFeature_RNA, ")"), + paste0("Dbl/Outlier (>", below.nFeature_RNA, ")") + ) + ) + + A <- ggplot(data = mm, aes(x = nFeature_RNA, fill = colour.thr.nFeature)) + + geom_histogram(binwidth = 100) + + ggtitle(paste("Cells between", above.nFeature_RNA, "and", below.nFeature_RNA, + " UMIs are selected \n(", pc_TRUE(filt.nFeature_RNA), ")")) + + geom_vline(xintercept = below.nFeature_RNA) + + geom_vline(xintercept = above.nFeature_RNA) + + theme(legend.position = "top") + # A + + B <- ggplot2::ggplot(mm, aes(x = nFeature_RNA, y = percent.mito)) + + ggplot2::ggtitle(paste( + "Cells below", percentage_formatter(below.mito), + "mito reads are selected \n(with A:", pc_TRUE(filt.nFeature_RNA & filt.below.mito), ")" + )) + + ggplot2::geom_point( + alpha = transparency, size = cex, show.legend = FALSE, + aes(color = filt.nFeature_RNA & filt.below.mito) + ) + + scale_x_log10() + # scale_y_log10() + + # annotation_logticks() + + geom_hline(yintercept = below.mito) + + geom_hline(yintercept = above.mito) + + geom_vline(xintercept = below.nFeature_RNA) + + geom_vline(xintercept = above.nFeature_RNA) + # B + + C <- ggplot(mm, aes(x = nFeature_RNA, y = percent.ribo)) + + ggtitle(paste( + "Cells below", percentage_formatter(below.ribo), + "ribo reads are selected \n(with A:", + pc_TRUE(filt.nFeature_RNA & filt.below.ribo), ")" + )) + + geom_point( + alpha = transparency, size = cex, show.legend = FALSE, + aes(color = filt.nFeature_RNA & filt.below.ribo) + ) + + scale_x_log10() + # scale_y_log10() + + annotation_logticks() + + geom_hline(yintercept = below.ribo) + + geom_hline(yintercept = above.ribo) + + geom_vline(xintercept = below.nFeature_RNA) + + geom_vline(xintercept = above.nFeature_RNA) + # C + + D <- ggplot(mm, aes(x = percent.ribo, y = percent.mito)) + + ggtitle(paste( + "Cells w/o extremes selected \n(with A,B,C:", + pc_TRUE(filt.nFeature_RNA & filt.below.mito & filt.below.ribo), ")" + )) + + geom_point( + alpha = transparency, size = cex, show.legend = FALSE, + aes(color = filt.nFeature_RNA & filt.below.mito & filt.below.ribo) + ) + + scale_x_log10() + + scale_y_log10() + + annotation_logticks() + + geom_hline(yintercept = below.mito) + + geom_hline(yintercept = above.mito) + + geom_vline(xintercept = below.ribo) + + geom_vline(xintercept = above.ribo) + # D + + plot_list <- list(A, B, C, D) + px <- cowplot::plot_grid(plotlist = plot_list, nrow = 2, ncol = 2, + labels = LETTERS[1:4], label_size = 20) + fname <- kpps(OutDir, FixPlotName("Filtering.thresholds", suffices[i], filetype)) + + cowplot::save_plot(filename = fname, plot = px, base_height = 14, ncol = 1, nrow = 1) # Figure 2 + stopifnot(file.exists(fname)) + } # for + # _________________________________________________________________________________________________ + create_set_OutDir(parentdir) +} + + +# _________________________________________________________________________________________________ +# plotting.statistics.and.QC.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/Plotting.statistics.and.QC.R') +# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Plotting.statistics.and.QC.R")) + +# Source: self + web + +# Requirements __________________________________________ +# require(Seurat) +# require(ggplot2) +# tools for tools::toTitleCase + +# May also require +# try (source('/GitHub/Packages/CodeAndRoll/CodeAndRoll.R'),silent= FALSE) # generic utilities funtions +# require('MarkdownReports') # require("devtools") + +# _________________________________________________________________________________________________ +#' @title Calculate the percent of variation explained by individual PC's +#' +#' @description This function calculates the percentage of variation each principal component (PC) +#' accounts for in a Seurat object. It's specifically tailored for Seurat objects and provides a +#' convenient way to understand the variance distribution across PCs. For similar calculations on +#' standard PCA objects, refer to github.com/vertesy/Rocinante `PCA.percent.var.explained()`. +#' +#' @param obj A Seurat object from which to calculate the percentage of variation explained by each +#' PC. Default: `combined.obj`. +#' +#' @return A named vector with the percentage of variation explained by each principal component. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' data("combined.obj") # Example Seurat object +#' var_explained <- scCalcPCAVarExplained(combined.obj) +#' print(var_explained) +#' } +#' } +#' +#' @export +scCalcPCAVarExplained <- function(obj = combined.obj) { # Determine percent of variation associated with each PC. + pct <- obj@reductions$pca@stdev / sum(obj@reductions$pca@stdev) * 100 + names(pct) <- 1:length(obj@reductions$pca@stdev) + return(pct) +} + +# _________________________________________________________________________________________________ +#' @title Plot the percent of variation explained by individual PC's +#' +#' @description This function plots the percentage of variation explained by each principal +#' component (PC) in a Seurat object. It allows for a visual assessment of how much variance is +#' captured by each PC, which is crucial for dimensionality reduction analyses. Users can choose +#' between two plotting methods: one using `MarkdownReports` and the other using `ggExpress`. +#' +#' @param obj A Seurat object from which to plot the percentage of variation explained by each PC. +#' Default: `combined.obj`. +#' @param plotname The title of the plot to be generated. Default: "Variance Explained by Principal +#' Components". +#' @param sub Subtitle for the plot, typically including information about the number of cells and +#' features analyzed. Default: A string generated from `obj` stating the number of cells and +#' features. +#' @param caption A caption for the plot. Default: "hline at 1%". +#' @param use.MarkdownReports Boolean indicating whether to use `MarkdownReports` for plotting. +#' If `FALSE`, `ggExpress` is used. Default: `FALSE`. +#' @param ... Additional arguments to be passed to `ggExpress::qbarplot` or `MarkdownReports::wbarplot`. +#' +#' @return Generates a plot showing the percent of variation each PC accounts for. This function +#' does not return a value but instead generates a plot directly. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' data("combined.obj") # Example Seurat object +#' scPlotPCAvarExplained(combined.obj, use.MarkdownReports = TRUE) +#' } +#' } +#' +#' @importFrom MarkdownReports wbarplot +#' @importFrom ggExpress qbarplot +#' +#' @export +scPlotPCAvarExplained <- function(obj = combined.obj, + plotname = "Variance Explained by Principal Components", + sub = paste(ncol(obj), "cells, ", nrow(obj), "features."), + caption = "hline at 1%", + # caption = .parseKeyParams(obj, suffix = "| hline at 1%"), + use.MarkdownReports = FALSE, + ...) { + message(" > Running scPlotPCAvarExplained...") + + pct <- scCalcPCAVarExplained(obj) + if (use.MarkdownReports) { + MarkdownReports::wbarplot(pct, xlab = "Principal Components", ylab = "% of variation explained", ...) + barplot_label(round(pct, digits = 2), barplotted_variable = pct, cex = .5) + } else { + ggExpress::qbarplot( + vec = pct, plotname = plotname, subtitle = sub, + xlab = "Principal Components", ylab = "% of variation explained" + , w = 10, h = 5, hline = 1, caption = caption, ... + ) + } +} + + + +# _________________________________________________________________________________________________ +# Gene Expression Plots ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +#' @title Gene Expression as Fraction of Total UMI Counts +#' +#' @description This function computes and visualizes gene expression levels as a fraction of total +#' UMI (Unique Molecular Identifier) counts across all genes in a Seurat object. It aims to highlight +#' the relative contribution of the most highly expressed genes to the overall transcriptome. +#' +#' @param obj A Seurat object containing gene expression data. +#' @param n.genes.barplot The number of top genes to be displayed in the final barplot, showing +#' their expression as a percentage of the total UMIs. Default: 25. +#' @param width.barplot The width of the barplot that visualizes the highest expressed genes. +#' Default: a quarter of `n.genes.barplot`. +#' +#' @return The same Seurat object passed as input, but with an additional list in the `@misc` slot +#' named `'TotalReadFraction'` that contains the relative total expression of each gene as a +#' fraction of total UMIs. +#' +#' @examples +#' \dontrun{ +#' combined.obj <- PercentInTranscriptome(combined.obj) +#' } +#' +#' @export +PercentInTranscriptome <- function( + obj = combined.obj, + assay = DefaultAssay(obj), + n.genes.barplot = 25, + width.barplot = round(n.genes.barplot / 4), + ...) { + # + message("Obj. version: ", obj@version) + message("assay: ", assay) + + m.expr <- if (obj@version < "5") { + obj@assays$RNA@counts + } else { + SeuratObject::GetAssayData(object = obj, assay = assay) + } + + + total.Expr <- sort(rowSums(m.expr), decreasing = TRUE) + relative.total.Expr <- total.Expr / sum(total.Expr) + print(head(iround(100 * relative.total.Expr), n = n.genes.barplot)) + + Relative.of.Total.Gene.Expression <- relative.total.Expr * 100 + + qhistogram(Relative.of.Total.Gene.Expression, + logX = FALSE, logY = TRUE, + plotname = "Gene expression as fraction of all UMI's", + subtitle = "Percentage in RNA-counts", + xlab = "Percent in Transcriptome (total per gene)", + ylab = "Number of genes", + xlab.angle = 45, + w = 7, h = 5, + ...) + + Highest.Expressed.Genes <- head(iround(100 * relative.total.Expr), n = n.genes.barplot) + qbarplot(Highest.Expressed.Genes, + plotname = "Percentage of highest expressed genes", + subtitle = "Total, in RNA-counts", + xlab = "", + ylab = "Gene expression as percent of all UMI's", + xlab.angle = 45, + w = width.barplot, h = 5, + ...) + + message("!!! \nTotalReadFraction is now stored under combined.obj@misc$'TotalReadFraction'.") + + obj@misc$"TotalReadFraction" <- relative.total.Expr + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Histogram All Genes' Expression Level and a Highlighted Gene +#' +#' @description Shows a comparison of the expression level of the chose gene to all genes. +#' Very useful to see if the gene has a meaningful expression level. This function generates a +#' histogram to visualize the expression level distribution of a specified gene across all cells in +#' a Seurat object. It highlights the position of the gene of interest within the overall distribution. +#' +#' @param gene The gene of interest for which the expression level distribution is to be plotted. +#' Default: 'TOP2A'. +#' @param obj A Seurat object containing the expression data. Default: The first Seurat object in `ls.Seurat`. +#' @param assay The assay from which to retrieve the expression data. Default: "RNA". +#' @param slot The slot in the Seurat object from which to retrieve the expression data. Options +#' include "counts" for raw counts and "data" for normalized (and possibly log-transformed) data. +#' Default: "data". +#' @param w The width of the plot. Default: 7. +#' @param h The height of the plot. Default: 4. +#' @param ... Any other parameter that can be passed to the internally called functions. +#' +#' @export +plotGeneExpressionInBackgroundHist <- function( + gene = "TOP2A", + obj = combined.obj, + assay = "RNA", + slot = c("counts", "data")[2], + w = 7, h = 4, + ...) { + message("gene: ", gene) + stopifnot(gene %in% rownames(obj)) + + + GEX.Counts <- GetAssayData(object = obj, assay = assay, slot = slot) + + GEX.Counts.total <- rowSums(GEX.Counts) + genes.expression <- GEX.Counts.total[gene] + mean.expr <- iround(mean(GEX.Counts[gene, ])) + + suffx <- if (slot == "counts") "raw" else "normalised, logtransformed" + (pname <- paste(gene, "and the", suffx, "transcript count distribution")) + + ggExpress::qhistogram(GEX.Counts.total, + vline = genes.expression, logX = TRUE, w = w, h = h, + subtitle = paste("It belong to the top", pc_TRUE(GEX.Counts.total > genes.expression), "of genes (black line). Mean expr:", mean.expr), + plotname = pname, xlab = "Total Transcripts in Dataset", ylab = "Number of Genes", + ... + ) +} + + + + +# _________________________________________________________________________________________________ +#' @title Histogram of Gene / Geneset Aggregate Expression Across Cells +#' +#' @description Creates and optionally saves a histogram showing expression levels of specified genes +#' within a Seurat object. Provides options for aggregate gene expression, expression threshold filtering, +#' and quantile clipping for count data. +#' +#' @param obj Seurat object to analyze; Default: `combined.obj`. +#' @param genes Vector of gene names to include in the analysis; Default: c("MALAT1", "MT-CO1"). +#' @param assay Assay to use from the Seurat object; Default: "RNA". +#' @param layerX Data slot to use ('data' or 'counts'); Default: "data". +#' @param thr_expr Expression threshold for highlighting in the plot; Default: 10. +#' @param suffix Additional text to append to the plot title; Default: NULL. +#' @param prefix Additional text to prepend to the plot title; Default: NULL. +#' @param xlab Label for the x-axis; Default: "log10(Summed UMI count @data)". +#' @param return_cells_passing If TRUE, returns count of cells exceeding the expression threshold; Default: `TRUE`.. +#' @param clip_count_qtl_thr Quantile threshold for clipping if using count data; Default: 0.95. +#' Needed for visualization (to avoid x axis compression). +#' @param log10_counts If TRUE, log10-transforms the COUNT expression values; Default: `TRUE`.. +#' @param return_quantile If TRUE, returns cell count exceeding the quantile threshold; Default: `FALSE`.. +#' @param w Width of the plot in inches; Default: 9. +#' @param h Height of the plot in inches; Default: 5. +#' @param show_plot If TRUE, displays the generated plot; Default: `TRUE`.. +#' @param ... Additional arguments for customization. +#' +#' @return Depending on the parameters, can return a ggplot object, the number of cells passing +#' the expression threshold, or the number of cells exceeding the quantile threshold. +#' +#' @examples +#' \dontrun{ +#' plotGeneExprHistAcrossCells(obj = yourSeuratObject, genes = c("GeneA", "GeneB")) +#' } +#' +#' @return Depending on the parameters, can return a ggplot object, the number of cells passing +#' the expression threshold, or the number of cells exceeding the quantile threshold. +#' +#' @export +#' @importFrom scales hue_pal +#' @importFrom Seurat GetAssayData +#' @importFrom ggplot2 geom_vline labs +#' @importFrom ggExpress qhistogram +plotGeneExprHistAcrossCells <- function( + genes = c("MALAT1", "MT-CO1", "MT-CO2", "MT-CYB", "TMSB4X", "KAZN"), + obj = combined.obj, + assay = "RNA", layerX = "data", + thr_expr = 10, + suffix = NULL, + prefix = NULL, + plotname = c("Summed Gene-set Expression -", "Expression of"), + xlab = paste0("Expression -log10(Summed UMIs @", layerX, ")"), + return_cells_passing = TRUE, + clip_count_qtl_thr = 0.99, + log10_counts = TRUE, + return_quantile, + w = 9, h = 5, + show_plot = TRUE, + ...) { + # + stopifnot( + length(genes) > 0, + layerX %in% c("data", "counts") + ) + + # Aggregate genes if necessary + aggregate <- length(genes) > 1 + SummedExpressionPerCell <- colSums(LayerData(object = obj, assay = assay, + layer = layerX)[genes, , drop = F]) + + # Clip counts if necessary + if (layerX == "counts") { + SummedExpressionPerCell <- CodeAndRoll2::clip.at.fixed.value( + x = SummedExpressionPerCell, + thr = quantile(SummedExpressionPerCell, probs = clip_count_qtl_thr) + ) + if (log10_counts) SummedExpressionPerCell <- log10(SummedExpressionPerCell + 1) + } + + # Create annotation + CPT <- paste("layer:", layerX, "| assay:", assay, "| cutoff at", iround(thr_expr)) + + # Add a subtitle with the number of genes and the expression threshold + SUBT <- filter_HP(SummedExpressionPerCell, threshold = thr_expr, return_conclusion = TRUE, plot.hist = FALSE) + + if (aggregate) { + SUBT <- paste0(SUBT, "\n", length(genes), "genes summed up, e.g: ", kppc(head(genes))) + TTL <- kppd(prefix, plotname[1], suffix) + + } else { + TTL <- trimws(paste(prefix, plotname[length(plotname)], paste(genes), suffix)) + } + + # Create the plot + pobj <- ggExpress::qhistogram(SummedExpressionPerCell, + plotname = TTL, + subtitle = SUBT, + caption = CPT, + prefix = prefix, + suffix = suffix, + vline = thr_expr[1], filtercol = -1, + xlab = xlab, + ylab = "# of cells", + w = w, h = h, + ... + ) + + # draw additional vlines if needed + if (length(thr_expr) > 1) { + pobj <- pobj + + ggplot2::geom_vline(xintercept = thr_expr[-1], col = 2, lty = 2, lwd = 1) + + ggplot2::labs(caption = "Red line marks original estimate") + ggExpress::qqSave(ggobj = pobj, title = sppp(TTL, "w.orig")) # , ext = '.png' + } + + + # Print the plot + if (show_plot) print(pobj) + + # Return the number of cells passing the filter + if (return_cells_passing) { + return(MarkdownHelpers::filter_HP(SummedExpressionPerCell, threshold = thr_expr, plot.hist = FALSE)) + } +} + + + +# _________________________________________________________________________________________________ +#' @title Percentage of Cells Above Threshold +#' +#' @description This function calculates the percentage of cells above a specified threshold for a given +#' feature in a Seurat object. It can subset the data based on a specified identity and values. +#' +#' @param obj A Seurat object. Default: combined.obj. +#' @param feature The feature to evaluate. +#' @param ident The identity class to split the data by. Default: GetNamedClusteringRuns(obj)[1]. +#' @param box Logical value indicating whether to plot the boxplot. Default: `TRUE`.. +#' @param box.ident The identity class to split the data by for individual dots in the boxplot. +#' Ident will be used for the boxes displayed (matching the barplot). Default: NULL. +#' @param threshold The threshold value to evaluate the feature against. Default: 2. +#' @param subset_ident The identity class to subset the data by. Default: NULL. +#' @param subset_values The values of the identity class to keep in the subset. Default: NULL. +#' @param omit.na Logical value indicating whether to omit NA values. Default: `TRUE`.. +#' @param assay The assay to use for feature extraction. Default: 'RNA'. +#' @param plot Logical value indicating whether to plot the results. Default: `TRUE`.. +#' @param ylab The label for the y-axis of the plot. Default: "% cells above threshold". +#' @param ... Additional parameters to pass to the plotting function. +#' +#' @return A named vector with the percentage of cells above the threshold for each identity class. +#' +#' @examples +#' \dontrun{ +#' PctCellsAboveX(obj = seurat_object, feature = 'GeneA', ident = 'CellType', threshold = 1.5) +#' } + +PctCellsAboveX <- function(obj = combined.obj, + feature = "TOP2A", + ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + threshold = 2, + suffix = ppp(substitute(obj), ncol(obj), "thr", threshold), + box = FALSE, + ident.box = NULL, + subset_ident = NULL, + subset_values = NULL, + omit.na = TRUE, + assay = 'RNA', + plot = TRUE, + caption = NULL, + ylab = "% cells above threshold", + # color = NULL, + ...) { + + stopifnot( + is(obj, "Seurat"), + feature %in% colnames(obj@meta.data) | feature %in% Features(obj, assay = assay), + ident %in% colnames(obj@meta.data), + is.null(subset_ident) | subset_ident %in% colnames(obj@meta.data), + is.null(subset_values) | subset_values %in% unique(obj@meta.data[ ,subset_ident]), + !box & is.null(ident.box) | box + ) + + if (!is.null(subset_ident)) { + obj <- subsetSeuObjByIdent(obj, ident = subset_ident, identGroupKeep = subset_values) + if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip) + } + + split_ident <- if(box) ident.box else ident + ls_feat <- split(obj@meta.data[, feature], f = obj@meta.data[, split_ident]) + if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip) + + # Calculate the percentage of cells above the threshold for each split_ident + Fraction.of.Cells.Above.Threshold <- sapply(ls_feat, function(x) sum(x > threshold) / length(x)) + + if(box) { + + # Arrange ident.box to categories of ident + ls.from_to <- lapply(split(obj@meta.data[, ident.box], f = obj@meta.data[, ident]), unique) + from_to <- list.2.replicated.name.vec(ls.from_to) + + stopifnot(all( names(from_to) %in% names(Fraction.of.Cells.Above.Threshold) )) + + from_to <- from_to[names(Fraction.of.Cells.Above.Threshold)] + + # Split Fraction + ls.Fraction.of.Cells.Above.Threshold <- split(Fraction.of.Cells.Above.Threshold, f = from_to) + } + + + if(plot){ + if(is.null(caption)) caption <- pc_TRUE(is.na(Fraction.of.Cells.Above.Threshold), + suffix = "of idents yielded NA/NaN & exluded from plot.") + TTL <- paste("Percentage of Cells Above Threshold for", feature) + STL <- paste("Cells above threshold for", feature, "above", threshold) + SFX <- ppp(feature, "by", ident, "thr", threshold, "subset_ident", subset_ident, suffix) + + Fraction.of.Cells.Above.Threshold <- na.omit.strip(Fraction.of.Cells.Above.Threshold) + + if(box) { + + pobj <- qboxplot(ls.Fraction.of.Cells.Above.Threshold, + , plotname = TTL, subtitle = STL, caption = caption, suffix = SFX + , add = "dotplot", xlab.angle = 45 + , hide.legend = TRUE, + , ylab = ylab + # , xlab = ident + , ...) + + } else { "barplot" + pobj <- qbarplot(Fraction.of.Cells.Above.Threshold, label = percentage_formatter(Fraction.of.Cells.Above.Threshold), + plotname = TTL, subtitle = STL, caption = caption, suffix = SFX, + ylab = ylab + # , xlab = ident + , ...) + } + + + + print(pobj) + } + + return(Fraction.of.Cells.Above.Threshold) +} + + +# _________________________________________________________________________________________________ +#' @title PctCellsExpressingGenes +#' +#' @description Calculates the proportion of cells expressing one or more specified genes using a Seurat +#' object as input. +#' +#' @param genes A character vector specifying the genes of interest. Must be a non-empty character vector. +#' @param obj A Seurat object containing single-cell data. +#' @param assay The assay to use for expression data. Default: "RNA". +#' @param min.expr The minimum expression level to consider a gene as "expressed". Default: 1. +#' @param ident A categorical variable from the metadata of the Seurat object. If NULL, returns overall +#' proportions. Default: NULL. +#' @param max.idents Maximum number of unique values allowed in the `ident` variable. Default: 100. +#' +#' @return A named vector if `ident` is NULL, containing the proportion of cells co-expressing all genes +#' (AND), the proportion expressing any gene (OR), and the proportion expressing each gene individually. +#' If `ident` is provided, returns a matrix with rows representing categories and columns representing +#' expression proportions. +#' +#' @examples +#' \dontrun{ +#' # Load the Seurat object (example) +#' library(Seurat) +#' combined.obj <- readRDS("path/to/your/seurat_object.rds") +#' +#' # Define genes of interest +#' # Define genes of interest +#' genes <- c("TOP2A", "MAP2") +#' # Call the function +#' PctCellsExpressingGenes(genes = genes, obj = combined.obj) +#' # Call the function with ident +#' #' PctCellsExpressingGenes(genes = genes, obj = combined.obj, ident = "cluster") +#' } +#' +#' @importFrom Seurat GetAssayData +#' @export + +PctCellsExpressingGenes <- function(genes, obj, assay = "RNA", min.expr = 1, + ident = NULL, max.idents = 100) { + # Input assertions + stopifnot( + is.character(genes) && length(genes) > 0, # genes must be a non-empty character vector + inherits(obj, "Seurat"), # obj must be a Seurat object + is.character(assay) && length(assay) == 1, # assay must be a single character string + is.numeric(min.expr) && length(min.expr) == 1, # min.expr must be a single numeric value + is.null(ident) || (is.character(ident) && length(ident) == 1), # ident must be NULL or a single character string + is.numeric(max.idents) && length(max.idents) == 1 && max.idents > 0 # max.idents must be a single positive numeric value + ) + + # Message parameters to console + message("Parameters:") + message(" genes: ", paste(genes, collapse = ", ")) + message(" assay: ", assay) + message(" min.expr: ", min.expr) + message(" ident: ", ifelse(is.null(ident), "NULL", paste(ident, lenght(ident), "-", head(ident)))) + message(" max.idents: ", max.idents) + + # Get the expression data + expr.data <- Seurat::GetAssayData(obj, assay = assay, slot = "data") + + # Check if the genes are in the expression data + genes <- intersect(genes, rownames(expr.data)) + if (length(genes) == 0) { + stop("None of the specified genes are present in the expression data.") + } + + # Define a function to calculate proportions + calc_proportions <- function(expr.data, genes, min.expr) { + # Calculate the proportion of cells expressing each gene + expr.prop <- sapply(genes, function(gene) { + sum(expr.data[gene, ] >= min.expr) / ncol(expr.data) + }) + + # Calculate the proportion of cells co-expressing all genes (AND) + coexpr.prop <- sum(apply(expr.data[genes, ] >= min.expr, 2, all)) / ncol(expr.data) + + # Calculate the proportion of cells expressing any gene (OR) + orexpr.prop <- sum(apply(expr.data[genes, ] >= min.expr, 2, any)) / ncol(expr.data) + + # Return the proportions + return(c(coexpr.prop, orexpr.prop, expr.prop)) + } + + # Calculate proportions + proportions <- calc_proportions(expr.data, genes, min.expr) + + # Message to console + message(sprintf("Percentage of cells co-expressing all genes (AND): %.2f%%", proportions[1] * 100)) + message(sprintf("Percentage of cells expressing any gene (OR): %.2f%%", proportions[2] * 100)) + for (i in seq_along(genes)) { + message("gene: ", genes[i], " ...") + message(sprintf("Percentage of cells expressing %s: %.2f%%", genes[i], proportions[2 + i] * 100)) + } + + # If ident is NULL, return the proportions vector + if (is.null(ident)) { + names(proportions) <- c("Expr.ALL", "Expr.ANY", genes) + return(proportions) + } + + # Check ident + ident_vals <- obj@meta.data[[ident]] + if (is.null(ident_vals)) { + stop(sprintf("The ident '%s' is not present in the metadata.", ident)) + } + + if (length(unique(ident_vals)) > max.idents) { + stop(sprintf("The number of unique values in ident '%s' exceeds max.idents.", ident)) + } + + # Message ident details + message("Ident details:") + message(" Length of idents: ", length(unique(ident_vals))) + message(" Head of ident values: ", paste(head(unique(ident_vals)), collapse = ", ")) + + # Calculate proportions per ident + idents <- unique(ident_vals) + result_matrix <- matrix(NA, nrow = length(idents), ncol = length(proportions)) + rownames(result_matrix) <- idents + colnames(result_matrix) <- c("Expr.ALL", "Expr.ANY", genes) + + for (ident_value in idents) { + message("Cluster: ", ident_value) + cells_in_ident <- which(ident_vals == ident_value) + expr.data_subset <- expr.data[, cells_in_ident, drop = FALSE] + result_matrix[ident_value, ] <- calc_proportions(expr.data_subset, genes, min.expr) + } + + return(result_matrix) +} + + +# _________________________________________________________________________________________________ +# Barplots / Compositional analysis ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ + +#' @title Generate Barplot of Cell Fractions +#' +#' @description This function generates a bar plot of cell fractions per cluster from a Seurat object. +#' It offers the option to downsample data, equalizing the number of cells in each group +#' to the number in the smallest group. The plot's bars are grouped by one variable and filled by another. +#' The function supports custom color palettes, drawing numerical values on bars, and saving the plot. +#' +#' @param fill.by The variable to fill by for the bar plot. +#' @param group.by The variable to group by for the bar plot. +#' @param obj A Seurat object. +#' @param plotname The title of the plot. +#' @param min.nr.sampled.cells The minimal number of cells to sample from each identity class. Defaults to 200 cells. +#' @param downsample Logical indicating whether to downsample data to equalize group sizes. +#' @param prefix Optional prefix for the plot title. +#' @param suffix Optional suffix for the plot title. +#' @param sub_title Optional subtitle for the plot. +#' @param hlines Numeric vector specifying y-intercepts of horizontal lines to add to the plot. +#' @param return_table Logical; if TRUE, returns a contingency table instead of plotting. +#' @param save_table Logical; if TRUE, saves the table behind the plot. +#' @param save_plot Logical; if TRUE, saves the generated plot. +#' @param also.pdf Save plot in both png and pdf formats. +#' @param seedNr Seed for random number generation to ensure reproducibility. +#' @param draw_plot Logical; if FALSE, suppresses plotting (useful if only the table is desired). +#' @param show_numbers Logical; if TRUE, adds count numbers on top of each bar in the plot. +#' @param min.pct Show % Labels above this threshold. Default = 0.05, or above 5 pct. +#' @param cex.pct Font size of pct labels. +#' @param min_frequency Minimum fraction to display individually in the plot; smaller fractions +#' are aggregated into an "Other" category. +#' @param custom_col_palette Specifies whether to use a standard or custom color palette. +#' @param color_scale Defines the color scale to use for the plot if a custom palette is selected. +#' @param show.total.cells Show total cells +#' @param cex.total Label size for total cells +#' @param xlab.angle Angle of x-axis labels. +#' @param show_plot Logical; if TRUE, shows the plot. +#' @param w Width of the plot in inches. Default: `NULL` +#' @param h Height of the plot in inches. Default: `6` +#' @param ... Additional parameters passed to internally called functions. +#' +#' @return Depending on the value of `return_table`, either returns a ggplot object or a list +#' containing values and percentages tables. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' scBarplot.CellFractions(obj = combined.obj, group.by = "integrated_snn_res.0.1", fill.by = "Phase", downsample = TRUE) +#' scBarplot.CellFractions(obj = combined.obj, group.by = "integrated_snn_res.0.1", fill.by = "Phase", downsample = FALSE) +#' } +#' } +#' @seealso \code{\link[tools]{toTitleCase}}, \code{\link[ggplot2]{ggplot}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{summarise}} +#' @importFrom tools toTitleCase +#' @importFrom dplyr group_by summarise sample_n +#' @importFrom ggplot2 ggplot geom_bar geom_hline labs theme_classic theme element_text scale_fill_manual geom_text +#' +#' @export +scBarplot.CellFractions <- function( + fill.by, + group.by = GetNamedClusteringRuns()[1], + obj = combined.obj, + downsample = FALSE, + min.nr.sampled.cells = 200, + plotname = kppws("Cell proportions of", fill.by, "by", group.by), + suffix = NULL, + prefix = NULL, + sub_title = suffix, + hlines = c(.25, .5, .75), + return_table = FALSE, + save_table = TRUE, + save_plot = TRUE, + also.pdf = FALSE, + seedNr = 1989, + draw_plot = TRUE, + show_numbers = FALSE, + min.pct = 0.05, + cex.pct = 2.5, + min_frequency = 0, # 0.025, + custom_col_palette = FALSE, + color_scale = getDiscretePaletteObj(ident.used = fill.by, obj = obj, palette.used = "glasbey"), + rnd_colors = FALSE, + show.total.cells = TRUE, + cex.total = 2, + xlab.angle = 45, + show_plot = TRUE, + w = NULL, + h = 6, + ...) { + + # Input assertions + stopifnot( + inherits(obj, "Seurat"), # obj must be a Seurat object + is.numeric(min_frequency) && length(min_frequency) == 1 && min_frequency >= 0 && min_frequency < 1, # min_frequency must be between 0 and 1 + group.by %in% colnames(obj@meta.data), # group.by must be a valid column in the meta.data slot of the Seurat object + fill.by %in% colnames(obj@meta.data), # fill.by must be a valid column in the meta.data slot of the Seurat object + "To many categories for X axis (group.by)" = nr.unique(obj@meta.data[, group.by]) < 100 + ) + + META <- obj@meta.data + + if (is.null(w)) { + categ_X <- nr.unique(META[, group.by]) + categ_Y <- nr.unique(META[, fill.by]) + w <- ceiling(max(6, categ_Y/4, categ_X/2)) + } + + set.seed(seedNr) + pname.suffix <- capt.suffix <- NULL + + if (downsample) { + tbl_X <- table(META[[fill.by]]) + n_smallest_group <- min(tbl_X) + largest_grp <- max(tbl_X) + + message("The size of the smallest group is: ", n_smallest_group, " cells.") + + dsample.to.repl.thr <- (n_smallest_group < min.nr.sampled.cells) # if less than 200 cells are sampled, sample with replacement + if (dsample.to.repl.thr) { + message(paste( + "If smallest category is <", min.nr.sampled.cells, + "of total cells, than down- or up-sampling, with replacement to that minimum." + )) + } + + # Update plot name and caption to reflect downsampling + plotname <- kpp(plotname, "downsampled") + pname.suffix <- "(downsampled)" + + capt.suffix <- paste0( + "\nDownsampled all groups in ", fill.by, " (Y) to ", min.nr.sampled.cells, + " cells before splitting by X. \nThis number is max(smallest group, 5% of total cells). Largest groups previosly was: ", largest_grp + ) + } + + + # Construct the caption based on downsampling and minimum frequency + PFX <- if (show_numbers) "Numbers denote # cells." else percentage_formatter(min.pct, prefix = "Labeled above") + caption_ <- paste("Top: Total cells per bar. |", PFX, capt.suffix) + + if (min_frequency > 0) caption_ <- paste(caption_, "\nCategories <", percentage_formatter(min_frequency), "are shown together as 'Other'") + pname_ <- paste(plotname, pname.suffix) + + + # Create a contingency table of the data + contingency.table <- table(META[, group.by], META[, fill.by]) + print(contingency.table) + + + if (show.total.cells) { + # First, calculate the total counts per group + totals <- META |> + group_by(!!sym(group.by)) |> + summarise(Total = n()) |> + ungroup() + + # Merge totals back with the original data for labeling + group_by_column <- group.by + META <- META |> + left_join(totals, by = setNames(nm = group_by_column, group_by_column)) + } + + + if (draw_plot) { + # calculate the proportions and add up small fractions + prop_table <- META |> + group_by(!!as.name(fill.by)) |> + summarise(proportion = n() / nrow(META)) |> + mutate("category" = ifelse(proportion < min_frequency, "Other", as.character(!!as.name(fill.by)))) + + categories <- unique(prop_table$"category") + n.categories <- length(categories) + message(n.categories, " Y-categories present: ", kppc(sort(categories))) + + # join the proportions back to the original data + META <- left_join(META, prop_table, by = fill.by) + + subtt <- kppws(group.by, "|", ncol(obj), "cells", sub_title) + + + if (downsample) { + # Downsample the data + META <- + META |> + group_by(!!sym(fill.by)) |> + sample_n( + size = max(n_smallest_group, min.nr.sampled.cells), + replace = dsample.to.repl.thr + ) |> + ungroup() + + contingency.table <- table(META[[group.by]], META[[fill.by]]) + contingency.table <- addmargins(contingency.table) + print(contingency.table) + } + + # Plot the data + pl <- META |> + group_by(!!sym(group.by)) |> + ggplot(aes(fill = category, x = !!sym(group.by))) + + geom_hline(yintercept = hlines, lwd = 1.5) + + geom_bar(position = "fill") + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs( + title = pname_, subtitle = subtt, + x = group.by, y = "Fraction of Cells", + fill = fill.by, caption = caption_ + ) + + theme_classic() + + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + + scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.1))) + + theme(axis.text.x = element_text(angle = xlab.angle, hjust = 1)) + + # Apply custom color palette if specified + if (!isFALSE(custom_col_palette)) { + stopifnot("Length(custom_col_palette) should be >= nr. categories displayed." = length(custom_col_palette) >= n.categories) + + all_categs_have_a_col <- all(categories %in% names(custom_col_palette)) + if(all_categs_have_a_col) { + colz_manual <- custom_col_palette[categories] + } else { + colz_manual <- custom_col_palette[seq(categories)] + } # end if all_categs_have_a_col + pl <- pl + scale_fill_manual(values = colz_manual) + + } else if (rnd_colors) { + colz_manual <- sample(rainbow(n.categories)) + pl <- pl + scale_fill_manual(values = colz_manual) + } # end if custom_col_palette / rnd_colors + + + + if (show_numbers) { + pl <- pl + geom_text(aes(label = ..count..), + stat = "count", position = position_fill(vjust = 0.5) + ) + } else { + pl <- pl + geom_text( + aes(label = ifelse((..count.. / tapply(..count.., ..x.., sum)[..x..]) >= min.pct, + scales::percent(..count.. / tapply(..count.., ..x.., sum)[..x..], accuracy = 1), + "" + )), + stat = "count", position = position_fill(vjust = 0.5), + size = cex.pct + ) + } + + if (show.total.cells) { + pl <- pl + geom_text( + data = totals, aes(x = !!sym(group.by), y = 1, label = Total), + vjust = -0.5, size = cex.total, inherit.aes = FALSE + ) + } + + if (save_plot) { + # sfx <- shorten_clustering_names(group.by) + sfx <- if (!is.null(suffix)) suffix else NULL + if (min_frequency) sfx <- sppp(sfx, min_frequency) + qqSave( + ggobj = pl, title = FixPlotName(plotname), also.pdf = also.pdf, w = w, h = h, + suffix = sppp(sfx, "fr.barplot") + # , ... + ) + } # save_plot + } # draw_plot + + # Compile contingency table and its frequencies + CT_freq_sc <- list( + "values" = contingency.table, + "percentages" = CodeAndRoll2::rowDivide(mat = contingency.table, vec = rowSums(contingency.table)) + ) + + if (save_table) { + ReadWriter::write.simple.xlsx(CT_freq_sc, filename = sppp(FixPlotName(plotname), suffix, "fr.barplot") + # suffix = sppp(FixPlotName(plotname), "fr.barplot") + ) + } + + # Return contingency table or plot based on return_table flag + if(show_plot) print(pl) + + if (return_table) { + return(CT_freq_sc) + } else { + invisible(pl) + } # end if return_table +} + + + + + +# _________________________________________________________________________________________________ +#' @title Barplot of Fraction of Cells per Cluster +#' +#' @description Visualizes the fraction of cells within each cluster through a barplot. +#' +#' @param obj Seurat object for analysis. Default: `combined.obj`. +#' @param ident Cluster identity. Used to specify which clustering results to visualize. +#' Default: First entry from ordered clustering runs. +#' @param sort If TRUE, sorts clusters by size. Default: `FALSE`.. +#' @param title Title for the plot. Default: "Cells per Identity Group". +#' @param sub Subtitle for the plot. Default: "identity". +#' @param label If TRUE, shows cell count or percentage based on the label vector. Default: `TRUE`.. +#' @param palette Color palette for the barplot. Default: 'glasbey'. +#' @param return_table If TRUE, returns the data used for plotting instead of the plot itself. Default: `FALSE`.. +#' @param min.cells Minimum cell count threshold for categories. Adjusted by data size. +#' @param suffix Optional suffix for file naming. Used in conjunction with `kpp`. +#' @param ylab_adj Adjustment factor for y-axis label positioning. Default: 1.1. +#' @param ... Additional parameters for internal function calls. +#' +#' @examples +#' \dontrun{ +#' scBarplot.CellsPerCluster() +#' scBarplot.CellsPerCluster(sort = TRUE) +#' } +#' @export scBarplot.CellsPerCluster +#' +#' @importFrom ggExpress qbarplot + +scBarplot.CellsPerCluster <- function( + obj = combined.obj, + ident = GetOrderedClusteringRuns(obj = obj)[1], + sort = FALSE, + plotname = "Cells per Identity Group", + sub = ident, + label = list(TRUE, "percent")[[1]], + suffix = if (label == "percent") "percent" else NULL, + palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[3], + return_table = FALSE, + ylab_adj = 1.1, + min.cells = round(ncol(obj) / 100), + ...) { + + stopifnot( + inherits(obj, "Seurat"), is.character(ident), is.logical(sort), is.character(plotname), is.character(sub), + is.logical(label), is.character(suffix) | is.null(suffix), is.character(palette), is.logical(return_table), + is.numeric(ylab_adj), is.numeric(min.cells), ident %in% colnames(obj@meta.data) + ) + + cat(0) + cell.per.cl <- obj[[ident]][, 1] + cell.per.cluster <- (table(cell.per.cl)) + if (sort) cell.per.cluster <- sort(cell.per.cluster) + lbl <- if (isFALSE(label)) { + NULL + } else if (label == "percent") { + percentage_formatter(cell.per.cluster / sum(cell.per.cluster)) + } else if (isTRUE(label)) { + cell.per.cluster + } else { + label + } + + min.PCT.cells <- min.cells / ncol(obj) + message("min cell thr: ", min.cells, " corresponding to min: ", percentage_formatter(min.PCT.cells)) + + n.clusters <- length(cell.per.cluster) + nr.cells.per.cl <- table(obj[[ident]][, 1]) + + SBT <- pc_TRUE(nr.cells.per.cl < min.cells, NumberAndPC = TRUE, + suffix = paste("of identites are below:", min.cells, "cells, or", percentage_formatter(min.PCT.cells), "of all cells." ) + ) + + pl <- ggExpress::qbarplot(cell.per.cluster, + plotname = plotname, + subtitle = paste0(sub, "\n", SBT), + suffix = kpp(ident, ncol(obj),"c", suffix), + col = 1:n.clusters, + caption = .parseBasicObjStats(obj = obj), + xlab.angle = 45, + ylim = c(0, ylab_adj * max(cell.per.cluster)), + label = lbl, + ylab = "Cells", + palette_use = DiscretePaletteSafe(n = n.clusters, palette.used = palette), + ... + ) + + if (return_table) { + print(pl) + return(cell.per.cluster) + } else { + return(pl) + } +} + +# _________________________________________________________________________________________________ +#' @title Cluster Size Distribution Plot (Barplot or Histogram) +#' +#' @description Generates a bar plot or histogram to visualize the size distribution of clusters +#' within a Seurat object, based on the specified clustering identity. +#' +#' @param obj Seurat object for analysis. Default: `combined.obj`. +#' @param ident Clustering identity to base the plot on. +#' Default: The second entry from `GetClusteringRuns()`. +#' @param plot Whether to display the plot (TRUE) or return cluster sizes (FALSE). Default: `TRUE`.. +#' @param thr.hist Threshold for switching from a bar plot to a histogram based on the number of +#' clusters. Default: 30. +#' @param ... Extra parameters for the plot. +#' +#' @examples +#' \dontrun{ +#' plotClustSizeDistr() +#' } +#' +#' @importFrom ggExpress qbarplot qhistogram +#' +#' @export +plotClustSizeDistr <- function( + obj = combined.obj, ident, + plot = TRUE, thr.hist = 30, ...) { + stopifnot(ident %in% colnames(obj@meta.data)) + + clust.size.distr <- table(obj@meta.data[, ident]) + print(clust.size.distr) + resX <- gsub(pattern = ".*res\\.", replacement = "", x = ident) + ptitle <- paste("Cluster sizes at ", ident) + psubtitle <- paste( + "Nr.clusters:", length(clust.size.distr), + "| median size:", median(clust.size.distr), + "| CV:", percentage_formatter(cv(clust.size.distr)) + ) + xlb <- "Cluster size (cells)" + ylb <- "Nr of Clusters" + xlim <- c(0, max(clust.size.distr)) + + if (plot) { + if (length(clust.size.distr) < thr.hist) { + ggExpress::qbarplot(clust.size.distr, + plotname = ptitle, subtitle = psubtitle, + label = clust.size.distr, xlab = "Clusters", ylab = xlb, ... + ) + } else { + ggExpress::qhistogram( + vec = clust.size.distr, plotname = ptitle, subtitle = psubtitle, + xlab = xlb, ylab = ylb, xlim = xlim, ... + ) + } + } else { + "return vector" + clust.size.distr + } +} + + +# _________________________________________________________________________________________________ +#' @title Barplot the Fraction of Cells Above Threshold per Cluster +#' +#' @description Generates a bar plot depicting the percentage of cells within each cluster that +#' exceed a specified threshold, based on a selected metadata column. +#' +#' @param value.col Column in metadata with values to assess against `thrX`. Default: 'percent.ribo'. +#' @param thrX Threshold for calculating the fraction of cells. Default: 0.3. +#' @param obj Seurat object with single-cell data. Default: `combined.obj`. +#' @param id.col Cluster identity column in metadata. Default: 'cl.names.top.gene.res.0.3'. +#' @param return.df Whether to return the underlying data frame instead of the plot. Default: `FALSE`.. +#' @param label Whether to add labels to the bar plot. Default: NULL. +#' @param subtitle Optional subtitle for the plot. +#' @param suffix Suffix for the output file name. +#' @param above Whether to calculate the fraction of cells above or below the threshold. Default: `TRUE`.. +#' @param ... Additional parameters for plotting functions. +#' +#' @examples +#' \dontrun{ +#' scBarplot.FractionAboveThr(id.col = "cl.names.top.gene.res.0.3", value.col = "percent.ribo", thrX = 0.2) +#' } +#' +#' @seealso \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}} +#' +#' @importFrom dplyr select group_by summarize +#' +#' @export +scBarplot.FractionAboveThr <- function( + value.col = "percent.ribo", + thrX = 0.1, + obj = combined.obj, + id.col = GetClusteringRuns(obj)[1], + subtitle = id.col, + return.df = FALSE, + label = NULL, + suffix = NULL, + above = TRUE, + ylim = c(0, 100), # set to null for relative y axis + ...) { + stopifnot(value.col %in% colnames(obj@meta.data)) + + meta <- obj@meta.data + metacol <- meta |> + dplyr::select(c(id.col, value.col)) + + (df_cells_above <- metacol |> + dplyr::group_by(!!sym(id.col)) |> + summarize( + n_cells = n(), + n_cells_above = sum(!!sym(value.col) > thrX), + fr_n_cells_above = n_cells_above / n_cells, + fr_n_cells_below = 1 - fr_n_cells_above + ) + ) + + + pass <- + if (above) { + metacol[, value.col] > thrX + } else { + metacol[, value.col] < thrX + } + total_average <- iround(100 * mean(pass)) + + df_2vec <- + if (above) { + df_cells_above[, c(1, 4)] + } else { + df_cells_above[, c(1, 5)] + } + + + (v.fr_n_cells_above <- 100 * deframe(df_2vec)) + + tag <- if (above) "above" else "below" + if (is.null(label)) label <- percentage_formatter(deframe(df_2vec), digitz = 2) + + pname <- paste("Pc. cells", tag, value.col, "of", thrX) + ggobj <- ggExpress::qbarplot(v.fr_n_cells_above, + label = label, + plotname = pname, + filename = FixPlotName(kpp(pname, id.col, ".pdf")), + suffix = suffix, + subtitle = subtitle, + caption = paste( + "Overall average (black line):", iround(total_average), "% |", + substitute(obj) + ), + xlab.angle = 45, + xlab = "Clusters", + ylab = paste("% Cells", tag, "thr. (", value.col, ")"), + ylim = ylim, + hline = total_average, + ... + ) + print(ggobj) + if (return.df) { + return(df_cells_above) + } else { + ggobj + } +} + + +# _________________________________________________________________________________________________ +#' @title Fraction of Cells Below Threshold per Cluster +#' +#' @description Generates a bar plot to visualize the percentage of cells within each cluster that +#' fall below a specified threshold, according to a metadata column value. +#' Inherits all parameters from `scBarplot.FractionAboveThr` with the exception that `above` is set to FALSE. +#' +#' @param thrX Threshold value for assessing cell counts. Default: 0.01. +#' @param value.col Metadata column with values for threshold comparison. Default: 'percent.ribo'. +#' @param id.col Cluster identifier in metadata. Default: 'cl.names.top.gene.res.0.3'. +#' @param obj Seurat object with cell data. Default: `combined.obj`. +#' @param return.df If TRUE, returns the data frame instead of the plot. Default: `FALSE`.. +#' +#' @examples +#' \dontrun{ +#' scBarplot.FractionBelowThr(id.col = "cl.names.top.gene.res.0.3", value.col = "percent.ribo", thrX = 0.01) +#' } +#' +#' @seealso `scBarplot.FractionAboveThr` +#' @seealso \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}} +#' +#' @importFrom dplyr select group_by summarize +#' +#' @export +scBarplot.FractionBelowThr <- function( + thrX = 0.2, + value.col = "percent.ribo", + id.col = "cl.names.top.gene.res.0.3", + obj = combined.obj, + return.df = FALSE, + subtitle = id.col, + suffix = NULL, + ...) { + scBarplot.FractionAboveThr( + thrX = thrX, + value.col = value.col, + id.col = id.col, + obj = obj, + return.df = return.df, + subtitle = subtitle, + suffix = suffix, + above = FALSE # Set `above` argument to FALSE to get fraction below threshold + ) +} + +# _________________________________________________________________________________________________ +# Pie Charts / Compositional analysis ______________________________ ---- +# _________________________________________________________________________________________________ +#' @title scPieClusterDistribution +#' +#' @description This function generates a pie chart of cluster distributions for a given clustering +#' identity in a single-cell RNA-seq object. +#' +#' @param obj A single-cell RNA-seq object. Default: `combined.obj`. +#' @param ident A character string specifying the clustering identity to be used. Default: the first +#' clustering run in the object. +#' @param ... Additional arguments passed to other methods. +#' +#' @return A pie chart displaying the cluster distribution. +#' @importFrom ggplot2 ggplot aes geom_bar coord_polar theme_minimal +#' @importFrom scales percent +#' +#' @examples +#' \dontrun{ +#' scPieClusterDistribution(obj = combined.obj, ident = 'cluster_identity') +#' } +scPieClusterDistribution <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[1], + ...) { + + # Input assertions + stopifnot( + is(obj, "Seurat"), is.character(ident), length(ident) == 1, + ident %in% colnames(obj@meta.data) + ) + + # Compute cluster sizes + cluster_sizes <- table(obj[[ident]]) + print(cluster_sizes) + + # Create pie chart + qpie(cluster_sizes, caption = .parseBasicObjStats(obj), subtitle = ident) +} + + + + +# _________________________________________________________________________________________________ +# List of Seurat Objects ______________________________ ---- +# _________________________________________________________________________________________________ + + + + + + +# _________________________________________________________________________________________________ +#' @title Barplot of Cells Per Seurat Object +#' +#' @description Visualizes the number of cells in each Seurat object within a list, showing the +#' distribution of cell counts across different datasets or experimental conditions. +#' +#' @param ls.obj List of Seurat objects to analyze. Default: `ls.Seurat`. +#' @param plotname Title for the plot. Default: 'Nr.Cells.After.Filtering'. +#' @param xlab.angle Angle for x-axis labels, enhancing readability. Default: 45. +#' @param names Optionally provide custom names for x-axis labels. If FALSE, uses object names +#' from `ls.obj`. Default: `FALSE`.. +#' @param ... Extra parameters passed to `qbarplot`. +#' +#' @examples +#' \dontrun{ +#' ls.obj <- list(obj, obj) +#' scBarplot.CellsPerObject(ls.obj) +#' } +#' +#' @export scBarplot.CellsPerObject + +scBarplot.CellsPerObject <- function( + ls.obj = ls.Seurat, + plotname = "Nr.Cells.After.Filtering", + xlab.angle = 45, + names = FALSE, ...) { + stopifnot( + "Should be run on a list of Seu. objects" = length(ls.obj) > 1, + "Should be run on a list of Seu. objects" = all(sapply(ls.obj, is, "Seurat")) + ) + + cellCounts <- sapply(ls.obj, ncol) + names(cellCounts) <- if (length(names) == length(ls.obj)) names else names(ls.obj) + ggExpress::qbarplot(cellCounts, + plotname = plotname, + subtitle = paste(sum(cellCounts), "cells in total"), + label = cellCounts, + xlab.angle = xlab.angle, + ylab = "Cells", + ... + ) +} + + + +# _________________________________________________________________________________________________ +#' @title Stacked Barplot of Metadata Categories for List of Seurat Objects +#' +#' @description Creates and saves a stacked barplot for a specified metadata category +#' from a list of Seurat objects. +#' +#' @param ls.obj List of Seurat objects. +#' @param meta.col The metadata column name to be used for the barplot. +#' @param ... Additional arguments passed to `ggExpress::qbarplot.df`. + +#' +#' @return A ggplot object representing the stacked barplot. +#' +#' @examples +#' \dontrun{ +#' ls.obj <- list(obj, obj) +#' scBarplotStackedMetaCateg_List(ls.obj, meta.col = orig.ident) +#' } +#' +#' @importFrom ggExpress qbarplot.df +#' @importFrom dplyr group_by summarise select +#' +#' @export +scBarplotStackedMetaCateg_List <- function( + ls.obj, + meta.col, # e.g. orig.ident + ...) { + stopifnot( + "Should be run on a list of Seu. objects" = length(ls.obj) > 1, + "Should be run on a list of Seu. objects" = all(sapply(ls.obj, is, "Seurat")), + length(meta.col) == 1, + "meta.col not found in 1st object" = meta.col %in% colnames(ls.obj[[1]]@meta.data) + ) + + # Creating a data frame for the stacked bar plot + df <- do.call(rbind, lapply(seq_along(ls.obj), function(x) { + data.frame( + Sample = names(ls.obj)[x], + Category = ls.obj[[x]]@meta.data[[meta.col]], + stringsAsFactors = FALSE + ) + })) + + # Summarizing to get counts of cells per category for each sample + df <- df |> + dplyr::group_by(Sample, Category) |> + dplyr::summarise(Cells = n(), .groups = "drop") |> + dplyr::select(Sample, Cells, Category) + + TTL <- paste(meta.col, "per object") + p <- ggExpress::qbarplot.df(df, + plotname = TTL, + scale = TRUE, hide.legend = FALSE, + ... + ) + print(p) + return(df) +} + + +# _________________________________________________________________________________________________ +# Colors ______________________________ ---- +# _________________________________________________________________________________________________ +#' @title Reproduce the ggplot2 default color palette +#' +#' @description Generates a vector of colors that emulates the default color palette used by ggplot2. +#' This function is useful for creating color sets for custom plotting functions or for applications +#' outside of ggplot2 where a similar aesthetic is desired. +#' +#' @param n Integer specifying the number of distinct colors to generate. +#' +#' @return A character vector of color values in hexadecimal format, replicating the default hue +#' color scale of ggplot2. +#' +#' @examples +#' \dontrun{ +#' # Generate a palette of 5 colors +#' print(gg_color_hue(5)) +#' } +#' +#' @export +gg_color_hue <- function(n) { + hues <- seq(15, 375, length = n + 1) + hcl(h = hues, l = 65, c = 100)[1:n] +} + + + +# _________________________________________________________________________________________________ +#' @title Safely generate a discrete color palette (NA). +#' +#' @description Safe wrapper around Seurat's DiscretePalette(), which returns NA's if too many +#' categories are requested +#' @param ident.used The identity column used for determining the number of clusters, Default: GetClusteringRuns()[1] +#' @param obj Seurat object, Default: combined.obj +#' @param palette.used The name of the palette to use, Default: c("alphabet", "alphabet2", +#' "glasbey", "polychrome", "stepped")[1] +#' @param show.colors Whether to display the colors in the palette, Default: `FALSE`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' getDiscretePalette() +#' } +#' } +#' +#' @export +getDiscretePalette <- function() .Deprecated("DiscretePaletteSafe and DiscretePaletteObj") + + +# _________________________________________________________________________________________________ +#' @title Generate a Discrete Color Palette for Seurat Clusters +#' +#' @description Generates a discrete color palette for visualizing clusters in a Seurat object, +#' using a specified identity column to determine the number of unique clusters. +#' +#' @param ident.used Identity column in the Seurat object to base the color palette on. +#' @param obj Seurat object containing clustering information. +#' @param palette.used The palette name to use for color generation. Options include "alphabet", +#' "alphabet2", "glasbey", "polychrome", and "stepped". Default: "alphabet2". +#' @param show.colors If TRUE, displays the generated colors. Default: `FALSE`.. +#' @param seed Seed for random color generation, ensuring reproducibility. Default: 1989. +#' +#' @return A character vector of color values corresponding to the number of clusters. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' ident.used <- "resolution_1" +#' obj <- YourSeuratObject +#' colors <- getDiscretePaletteObj(ident.used = ident.used, obj = obj) +#' print(colors) +#' } +#' } +#' +#' @export +getDiscretePaletteObj <- function(ident.used, + obj, + palette.used = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[2], + show.colors = FALSE, + seed = 1989) { + stopifnot( + is.character(ident.used), is(obj, "Seurat"), + is.character(palette.used), is.logical(show.colors), is.numeric(seed) + ) + + categs <- unique(unlist(obj[[ident.used]])) + n.clusters <- length(categs) + + colorz <- DiscretePaletteSafe( + n = n.clusters, + palette.used = palette.used, + show.colors = show.colors, + seed = seed + ) + names(colorz) <- categs + + return(colorz) +} + + +# _________________________________________________________________________________________________ +#' @title Safely generate a Discrete color palette. +#' +#' @description Generates a discrete color palette, ensuring no NA values are included, suitable +#' for visualizations where a specific number of distinct, reproducible colors is needed. +#' +#' @param n Number of colors to generate. +#' @param palette.used Palette name to use for generating colors. Options include "alphabet", +#' "alphabet2", "glasbey", "polychrome", "stepped". Default: "alphabet2". +#' @param show.colors If TRUE, displays the generated color palette. Default: `FALSE`.. +#' @param seed Seed value for reproducibility, especially when random color generation is involved. +#' Default: 1989. +#' +#' @return Character vector of HEX color values. +#' +#' @examples +#' \dontrun{ +#' colors <- DiscretePaletteSafe(n = 10) +#' print(colors) +#' } +#' +#' @importFrom gplots rich.colors +#' @importFrom Seurat DiscretePalette +#' +#' @export +DiscretePaletteSafe <- function(n, + palette.used = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[2], + show.colors = FALSE, + seed = 1989) { + stopifnot( + is.numeric(n), n > 0, is.character(palette.used), + is.logical(show.colors), is.numeric(seed) + ) + + colorz <- Seurat::DiscretePalette(n = n, palette = palette.used) + + if (anyNA(colorz)) { + colorsOK <- colorz[!is.na(colorz)] + n.colz <- length(colorsOK) + + msg <- paste( + "More categories then present in the palette", n, "vs.", + n.colz, "in", palette.used, "-> recycling." + ) + warning(msg, immediate. = TRUE) + + set.seed(seed) + if (n > 10 * n.colz) { + colorz <- sample(gplots::rich.colors(n)) + } else { + colorz <- sample(x = colorsOK, size = n, replace = TRUE) + } + + stopifnot(!anyNA(colorz)) + } + + if (show.colors) MarkdownHelpers::color_check(colorz) + return(colorz) +} + + +# _________________________________________________________________________________________________ +#' @title Regenerate Cluster Colors from a Seurat Object +#' +#' @description Regenerate and optionally displays the color scheme associated with the clusters +#' in a Seurat object as defined by a specified identity column. +#' +#' @param obj Seurat object containing clustering information. +#' @param use_new_palettes Logical indicating whether to use custom palettes defined in +#' `DiscretePalette` function. Default: `TRUE`.. +#' @param palette Name of the color palette to use if `use_new_palettes` is TRUE. +#' Options: "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: "glasbey". +#' @param ident Clustering identity to use for coloring. Retrieved from the first entry +#' of `GetClusteringRuns()` by default. +#' @param show If TRUE, displays a plot showing the color mapping for each cluster. Default: `TRUE`.. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' getClusterColors(obj = combined.obj, ident = GetClusteringRuns(combined.obj)[1]) +#' } +#' } +#' @export +#' +#' @importFrom scales hue_pal +getClusterColors <- function( + obj = combined.obj, + use_new_palettes = TRUE, + palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[3], + ident = GetClusteringRuns()[1], + show = TRUE) { + (identities <- levels(as.factor(obj[[ident]][, 1]))) + n.clusters <- length(unique(obj[[ident]][, 1])) + color_palette <- if (use_new_palettes) { + DiscretePalette(n = n.clusters, palette = palette) + } else { + scales::hue_pal()(length(identities)) + } + + names(color_palette) <- (identities) + identvec <- obj[[ident]][, 1] + colz <- color_palette[identvec] + names(colz) <- identvec + if (show) MarkdownHelpers::color_check(unique(colz)) + colz +} + + + +# _________________________________________________________________________________________________ +#' @title Regenerate Color Scheme for Clusters in Seurat Object as a vector +#' +#' @description Extracts and optionally displays the color scheme assigned to cluster identities +#' within a Seurat object, facilitating consistent color usage across visualizations. You can +#' check results in a barplot with `MarkdownHelpers::color_check()`. +#' +#' @param ident Specific clustering identity to use for color extraction. +#' If NULL, the active identity in `obj` is used. Default: NULL. +#' @param obj Seurat object from which to extract cluster colors. +#' Default: `combined.obj`. +#' @param plot.colors If TRUE, visually displays the color scheme. +#' Default: `FALSE`.. +#' @param simple If TRUE, returns only the unique set of colors used. +#' If FALSE, returns a named vector mapping cluster identities to colors. +#' Default: `FALSE`.. +#' +#' @examples +#' \dontrun{ +#' # Display colors for the active identity +#' SeuratColorVector() +#' # Retrieve and plot colors for a specified clustering identity +#' SeuratColorVector(ident = "RNA_snn_res.1", plot.colors = TRUE) +#' } +#' +#' @seealso \code{\link[scales]{hue_pal}} +#' +#' @export +#' @importFrom scales hue_pal +SeuratColorVector <- function(ident = NULL, obj = combined.obj, plot.colors = FALSE, simple = FALSE) { + if (!is.null(ident)) { + print(ident) + ident.vec <- obj[[ident]][, 1] + } else { + print(obj@active.ident) + ident.vec <- obj@active.ident + } + ident.vec <- as.factor(ident.vec) + print(table(ident.vec)) + colorlevels <- scales::hue_pal()(length(levels(ident.vec))) + if (plot.colors) color_check(colorlevels) + if (simple) { + colorlevels + } else { + CodeAndRoll2::translate( + vec = as.character(ident.vec), + old = levels(ident.vec), + new = colorlevels + ) + } +} + + +# _________________________________________________________________________________________________ +# Metadata Heatmaps ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title Plot and Save Heatmaps from Metadata Calculation Results +#' +#' @description Generates and saves heatmap visualizations for each metric in the results obtained +#' from metadata calculations, such as `calculateAverageMetaData() - mean or median values of +#' specified features across different categories. +#' +#' @param results A list containing data frames with calculated metrics for each specified +#' metadata feature, grouped by categories. Typically, this is the output from a +#' function like `calculateAverageMetaData()`. +#' @param path The directory path where the heatmap images will be saved. +#' Defaults to the current working directory (`getwd()`). +#' @param file.prefix A prefix for the filenames of the saved heatmap images. +#' Defaults to "heatmap_". +#' @param scale Character indicating if the values should be scaled in the row direction, +#' column direction, both ('row', 'column', 'none'). Defaults to "column". +#' @param cluster_rows Logical indicating whether to cluster rows. Defaults to FALSE. +#' @param show_rownames Logical indicating whether to show row names. Defaults to TRUE. +#' @param show_colnames Logical indicating whether to show column names. Defaults to TRUE. +#' @param ... Additional arguments passed to `pheatmap::pheatmap`. +#' +#' @details This function loops through each metric in the `results`, creates a heatmap +#' for it using `pheatmap`, and saves the heatmap as a PNG file in the specified path. +#' The file names will start with the provided `file.prefix`, followed by the metric name. +#' +#' @examples +#' # Assuming `results` is the output from `calculateAverageMetaData`: +#' results <- calculateAverageMetaData(obj = combined.obj) +#' plotAndSaveHeatmaps(results, path = "path/to/save/heatmaps", file.prefix = "myData_") +#' +#' @return Invisible. The function primarily generates and saves files without returning data. +#' +#' @export +plotAndSaveHeatmaps <- function(results, path = getwd(), + file.prefix = "heatmap_", + scale = "column", + cluster_rows = FALSE, + display_numbers = TRUE, + show_rownames = TRUE, + show_colnames = TRUE, + rowname_column = 1, + ...) { + stopifnot(is.list(results), is.character(file.prefix), is.character(path) ) + + for (mt in names(results)) { + res <- results[[mt]] + stopifnot( !anyNA(res[[rowname_column]]), + !anyNaN(res[[rowname_column]]) + ) + + # Generate heatmap plot + x <- ReadWriter::column.2.row.names(results[[mt]], rowname_column = rowname_column ) + pobj <- pheatmap::pheatmap(mat = x, + main = paste("Heatmap of", mt, "values"), + scale = "column", + cluster_rows = cluster_rows, + display_numbers = display_numbers, + show_rownames = show_rownames, + show_colnames = show_colnames + ) + + # Construct file name + file_name <- paste0(file.prefix, mt, ".png") + file_path <- file.path(path, file_name) + + # Save plot + MarkdownReports::wplot_save_pheatmap(x = pobj, data = x, plotname = file_name, + png = TRUE, pdf = FALSE, ...) + cat("Saved heatmap for", mt, "to", file_path, "\n") + } # for +} + + +# _________________________________________________________________________________________________ +# plotting generic, misc ______________________________ ---- +# _________________________________________________________________________________________________ + + +# _________________________________________________________________________________________________ +#' @title Scatter Plot of Two Features in Seurat Object +#' +#' @description Generates a scatter plot comparing two features (genes or metrics) from a Seurat +#' object and optionally saves it. The function wraps around Seurat's `FeatureScatter` for +#' enhanced usability, including optional logarithmic transformations and saving capabilities. +#' +#' @param feature1 The first feature for the scatter plot's x-axis. +#' @param feature2 The second feature for the scatter plot's y-axis. +#' @param obj Seurat object containing the data for features. +#' @param ext File extension for saving the plot, if enabled. +#' @param plot Flag to display the plot within the R session. +#' @param logX Apply logarithmic transformation to x-axis values. +#' @param logY Apply logarithmic transformation to y-axis values. +#' @param ... Additional parameters passed to Seurat's `FeatureScatter`. +#' +#' @return A `ggplot` object of the feature scatter plot if `plot` is TRUE. +#' +#' @examples +#' \dontrun{ +#' # Generate and display a scatter plot for features TOP2A and ID2 +#' qFeatureScatter(feature1 = "TOP2A", feature2 = "ID2", obj = yourSeuratObject) +#' } +#' +#' @seealso \code{\link[Seurat]{FeatureScatter}}, \code{\link[ggplot2]{ggplot}} +#' +#' @export +#' @importFrom ggExpress qqSave +#' @importFrom Seurat FeatureScatter +#' @importFrom ggplot2 ggtitle theme_linedraw scale_x_log10 scale_y_log10 +qFeatureScatter <- function( + feature1 = "TOP2A", feature2 = "ID2", obj = combined.obj, + ext = "png", plot = TRUE, + logX = FALSE, logY = FALSE, + ...) { + plotname <- kpp(feature1, "VS", feature2) + p <- FeatureScatter(object = obj, feature1 = feature1, feature2 = feature2, ...) + + ggtitle(paste("Correlation", plotname)) + + theme_linedraw() + + if (logX) p <- p + scale_x_log10() + if (logY) p <- p + scale_y_log10() + + # fname <- kpp("FeatureScatter", plotname) + ggExpress::qqSave(ggobj = p, title = plotname, ext = ext, w = 8, h = 5) + if (plot) p +} + + +# _________________________________________________________________________________________________ +#' @title Create a Violin Plot for a Seurat Object Feature and save the file. +#' +#' @description Generates a violin plot for a specified feature in a Seurat object, +#' allowing for the data to be split by a specified grouping variable. +#' The function supports customization options such as logarithmic scaling, custom titles, and more. +#' +#' @param obj A Seurat object to be plotted. +#' @param feature A character string specifying the name of the feature to plot. +#' @param ident A character vector specifying the identities to be used in the plot. +#' @param split.by A character string specifying the grouping variable for splitting the plot. +#' @param colors A character vector specifying the colors to use for the plot. +#' @param clip.outliers A logical indicating whether to clip outliers. +#' @param replace.na A logical indicating whether NA values should be replaced. +#' @param pt.size The size of the individual datapoints in the plot. Set to 0 to get a clean violin plot. +#' @param sub Subtitle of the plot. Default: feature by ident. +#' @param suffix An optional string to append to the title of the plot. +#' @param suffix.2.title A logical indicating whether to append the suffix to the plot title. +#' @param logY A logical indicating whether to use a logarithmic scale for the y-axis. +#' @param hline A numeric or logical value; if numeric, the value where a horizontal line should be drawn. +#' @param caption A character string or logical for the plot caption. If FALSE, no caption is displayed. +#' @param ylab Y-axis label. Default: "Expression". +#' @param ylimit A numeric vector specifying the limits of the y-axis. +#' @param legend Show legend; Default: opposite of `label`. +#' @param legend.pos Position of legend; Default: 'NULL'. +#' @param legend.title Title of legend; Default: 'split.by'. +#' @param show_plot A logical indicating whether to display the plot. +#' @param grid A logical indicating whether to display grid lines. +#' @param w Width of the plot. +#' @param h Height of the plot. +#' @param ... Additional arguments passed to `VlnPlot`. +#' +#' @return A ggplot object representing the violin plot. +#' +#' @examples +#' # Assuming `seurat_obj` is a valid Seurat object +#' qSeuViolin(obj = seurat_obj, feature = "nFeature_RNA") +#' +#' @export +qSeuViolin <- function( + obj, + feature = "nFeature_RNA", + ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + split.by = NULL, + colors = NULL, + clip.outliers = TRUE, + replace.na = FALSE, + pt.size = 0.5, + sub = NULL, + suffix = NULL, + suffix.2.title = FALSE, + caption = .parseKeyParams(obj), + logY = TRUE, + hline = FALSE, + ylab = "Expression", + ylimit = NULL, + legend = TRUE, + legend.pos = NULL, # c("top", "bottom", "left", "right", "none")[2], + legend.title = NULL, + show_plot = TRUE, + grid = TRUE, + w = NULL, h = 7, + ...) { + # + stopifnot( + "Seurat" %in% class(obj), # object must be a Seurat object + is.logical(logY), # logY must be logical (TRUE or FALSE) + is.logical(hline) || is.numeric(hline), # hline must be logical or numeric + is.logical(caption) || is.character(caption), # caption must be logical or character + is.logical(suffix.2.title), # suffix.2.title must be logical + is.character(split.by) | is.null(split.by), # split.by must be a character or NULL + split.by %in% colnames(obj@meta.data), + is.character(ident), + ident %in% colnames(obj@meta.data), + is.character(feature), + feature %in% colnames(obj@meta.data) || feature %in% rownames(obj) + ) + + if(exists('idents')) warning("Use arg. ident instead of idents!\n", immediate. = TRUE) + if(exists('features')) warning("Use arg. feature instead of features!\n", immediate. = TRUE) + + split_col <- unlist(obj[[ident]]) + if(is.null(w)) w <- ceiling(length(unique(split_col))/6) + 6 + message("Plot width: ", w) + + + ttl <- if (suffix.2.title) { + paste(feature, "|", suffix) + } else { + as.character(feature) + } + subt <- paste(feature, "- by -", ident) + if(!is.null(sub)) subt <- paste0(subt, "\n", sub) + + if (replace.na) { + warning("NA's are not, but zeros are displayed on the plot. Avoid replace.na when possible", immediate. = TRUE) + obj@meta.data[[feature]] <- na.replace(x = obj@meta.data[[feature]], replace = 0) + } + + # browser() + if (clip.outliers) { + warning("Outliers are clipped at percentiles 0.5% and 99.5%", immediate. = TRUE) + obj@meta.data[[feature]] <- CodeAndRoll2::clip.outliers.at.percentile( + x = obj@meta.data[[feature]], percentiles = c(.005, .995) ) + } + + if (!is.null(colors)) { + stopifnot(colors %in% colnames(obj@meta.data)) + col_long <- as.factor(unlist(obj[[colors]])) + colors <- as.factor.numeric(sapply(split(col_long, split_col), unique)) + stopifnot("colors cannot be uniquely split by ident. Set colors = NULL!" = length(colors) == length(unique(split_col))) + } + + p.obj <- Seurat::VlnPlot(object = obj, + features = feature, group.by = ident, + cols = colors, split.by = split.by, + pt.size = pt.size, ...) + + theme(axis.title.x = element_blank()) + + labs(y = ylab) + + ggtitle(label = ttl, subtitle = subt ) + + if (!legend) p.obj <- p.obj + NoLegend() + if (!is.null(legend.title)) p.obj <- p.obj + guides(fill = guide_legend(legend.title)) else NULL + if (grid) p.obj <- p.obj + ggpubr::grids(axis = "y") + + # Add additional customization, if needed.. + if (!is.null(ylimit)) p.obj <- p.obj + ylim(ylimit[1], ylimit[2]) + if (logY) p.obj <- p.obj + ggplot2::scale_y_log10() + if (hline[1]) p.obj <- p.obj + ggplot2::geom_hline(yintercept = hline) + if (!isFALSE(caption)) p.obj <- p.obj + ggplot2::labs(caption = caption) + if (!is.null(legend.pos)) p.obj <- p.obj + theme(legend.position = legend.pos) + + # Save the plot. + TTL <- ppp(as.character(feature), "by", ident, suffix) + qqSave(p.obj, title = TTL, suffix = ppp(flag.nameiftrue(logY), "violin"), w = w, h = h, limitsize = FALSE) + if (show_plot) p.obj +} + + + + +# _________________________________________________________________________________________________ +# Plotting 2D UMAPs, etc. ______________________________ ---- +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +#' @title Quick UMAP Visualization of Gene Expression and automatically save the plot +#' +#' @description Generates a UMAP visualization for a specific feature from a Seurat object, and +#' automatically saves it. Offers options for custom titles, subtitles, saving, and more. Assumes +#' default options for custom titles, subtitles, saving, and more. +#' +#' @param feature Feature to visualize on the UMAP; Default: 'TOP2A'. +#' @param obj Seurat object containing single-cell RNA-seq data; Default: `combined.obj`. +#' @param title Title of the plot; Default: `feature`. +#' @param sub Subtitle of the plot; Default: NULL. +#' @param reduction Dimension reduction technique to be used ('umap', 'tsne', or 'pca'); Default: 'umap'. +#' @param splitby Column in the metadata to split the cells by; Default: NULL. +#' @param prefix Prefix added before the filename; Default: NULL. +#' @param suffix Suffix added to the end of the filename; Default: `sub`. +#' @param save.plot If TRUE, the plot is saved into a file; Default: `TRUE`.. +#' @param PNG If TRUE, the file is saved as a .png; Default: `TRUE`.. +#' @param h Height of the plot in inches; Default: 7. +#' @param w Width of the plot in inches; Default: NULL. +#' @param nr.cols Number of columns to combine multiple feature plots, ignored if `split.by` is not NULL; Default: NULL. +#' @param assay Which assay to use ('RNA' or 'integrated'); Default: 'RNA'. +#' @param axes If TRUE, axes are shown on the plot; Default: `FALSE`.. +#' @param aspect.ratio Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: `FALSE`.. +#' @param HGNC.lookup If TRUE, HGNC gene symbol lookup is performed; Default: `TRUE`.. +#' @param make.uppercase If TRUE, feature names are converted to uppercase; Default: `FALSE`.. +#' @param qlow Lower quantile for the color scale; Default: 'q10'. +#' @param qhigh Upper quantile for the color scale; Default: 'q90'. +#' @param check_for_2D If TRUE, checks if UMAP is 2 dimensional; Default: `TRUE`.. +#' @param caption Adds a caption to the ggplot object; Default: dynamically generated from `obj`. +#' @param ... Additional parameters to pass to the internally called functions. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' qUMAP(feature = "nFeature_RNA", obj = yourSeuratObject) +#' qUMAP(feature = "TOP2A", obj = yourSeuratObject, PNG = FALSE, save.plot = TRUE) +#' } +#' } +#' +#' @export +#' @importFrom Seurat FeaturePlot NoLegend NoAxes +#' @importFrom ggplot2 ggtitle coord_fixed labs +#' +qUMAP <- function( + feature = "TOP2A", obj = combined.obj, + title = feature, sub = NULL, + reduction = "umap", splitby = NULL, + prefix = NULL, + suffix = make.names(sub), + save.plot = MarkdownHelpers::TRUE.unless("b.save.wplots", v = FALSE), + PNG = TRUE, + h = 7, w = NULL, nr.cols = NULL, + assay = c("RNA", "integrated")[1], + axes = FALSE, + aspect.ratio = c(FALSE, 0.6)[1], + HGNC.lookup = TRUE, + make.uppercase = FALSE, + check_for_2D = TRUE, + qlow = "q10", qhigh = "q90", + caption = .parseBasicObjStats(obj, simple = TRUE), + ...) { + # + stopifnot(is(obj) == "Seurat") + message("Feature: ", feature, " | Assay: ", assay) + + if (feature %in% colnames(obj@meta.data)) { + message(paste("...found in @meta.data.")) + stopifnot(is.numeric(obj@meta.data[, feature])) + } + + if (check_for_2D) { + umap_dims <- ncol(obj@reductions[[reduction]]@cell.embeddings) + if (umap_dims != 2) warning(">>> UMAP is not 2 dimensional! \n Check obj@reductions[[reduction]]@cell.embeddings") + } + + if (!(feature %in% colnames(obj@meta.data) | feature %in% Features(obj, assay = assay))) { + feature <- check.genes( + list.of.genes = feature, obj = obj, verbose = FALSE, + HGNC.lookup = HGNC.lookup, makeuppercase = make.uppercase + ) + } + + DefaultAssay(obj) <- assay + gg.obj <- Seurat::FeaturePlot(obj, + features = feature, + reduction = reduction, + min.cutoff = qlow, max.cutoff = qhigh, + ncol = nr.cols, + split.by = splitby, + ... + ) + + ggtitle(label = title, subtitle = sub) + + if (!axes) NoAxes() else NULL + + if (aspect.ratio) gg.obj <- gg.obj + ggplot2::coord_fixed(ratio = aspect.ratio) + if (!isFALSE(caption)) gg.obj <- gg.obj + ggplot2::labs(caption = caption) + + if (save.plot) { + fname <- ww.FnP_parser(sppp(prefix, toupper(reduction), feature, assay, paste0(ncol(obj),"c"), suffix), if (PNG) "png" else "pdf") + try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w)) # , ncol = 1, nrow = 1 + } + return(gg.obj) +} + + + +# _________________________________________________________________________________________________ +#' @title clUMAP - Quick Visualization of Clustering Results with UMAP and automatically save the plot +#' +#' @description Generates a UMAP visualization based on clustering results from a Seurat object, +#' and automatically saves it. Offers options for custom titles, subtitles, saving, and more. Assumes +#' default options for custom titles, subtitles, saving, and more. +#' +#' @param ident Cluster identity for visualization; Default: 'integrated_snn_res.0.5'. +#' @param obj Seurat object containing single-cell data; Default: `combined.obj`. +#' @param reduction Dimension reduction method ('umap', 'tsne', 'pca'); Default: 'umap'. +#' @param splitby Metadata column to split cells by; optional; Default: NULL. +#' @param title Main title of the plot; Default: `ident`. +#' @param sub Subtitle of the plot; optional; Default: NULL. +#' @param prefix Prefix for saved filename; optional; Default: NULL. +#' @param suffix Suffix for saved filename; defaults to plot subtitle; Default: NULL. +#' @param caption Plot caption; optional; Default: dynamically generated from `obj`. +#' @param label.cex Size of cluster labels; Default: 7. +#' @param h Height of plot in inches; Default: 7. +#' @param w Width of plot in inches; optional; Default: NULL. +#' @param nr.cols Number of columns for facet wrap if `splitby` is not NULL; Default: NULL. +#' @param plotname Custom plot name for saving; Default: dynamically generated from `reduction` and `ident`. +#' @param cols Custom color vector for clusters; optional; Default: NULL. +#' @param palette Color palette for generating cluster colors; Default: 'glasbey'. +#' @param highlight.clusters Specific clusters to be highlighted; optional; Default: NULL. +#' @param cells.highlight Specific cells to be highlighted; optional; Default: NULL. +#' @param label Show cluster labels; Default: `TRUE`.. +#' @param repel Repel labels to avoid overlap; Default: `TRUE`.. +#' @param legend Show legend; Default: opposite of `label`. +#' @param legend.pos Position of legend; Default: 'NULL'. +#' @param axes Show axes; Default: `FALSE`.. +#' @param aspect.ratio Fixed aspect ratio for the plot; Default: `TRUE`.. +#' @param MaxCategThrHP Maximum number of categories before simplification; Default: 200. +#' @param save.plot Save plot to file; Default: `TRUE`.. +#' @param PNG Save as PNG (TRUE) or PDF (FALSE); Default: `TRUE`.. +#' @param check_for_2D Ensure UMAP is 2D; Default: `TRUE`.. +#' @param ... Additional parameters for `DimPlot`. +#' +#' @examples +#' \dontrun{ +#' clUMAP(ident = "integrated_snn_res.0.5", obj = yourSeuratObj) +#' clUMAP(ident = "integrated_snn_res.0.5", obj = yourSeuratObj, cols = RColorBrewer::brewer.pal(8, "Dark2")) +#' } +#' +#' @importFrom ggplot2 ggtitle labs coord_fixed ggsave +#' @importFrom Seurat DimPlot NoLegend NoAxes +#' @importFrom RColorBrewer brewer.pal +#' +#' @export +clUMAP <- function( + ident = NULL, + obj = combined.obj, + title = ident, + sub = NULL, + prefix = NULL, + suffix = make.names(sub), + caption = .parseBasicObjStats(obj, simple = TRUE), # try(.parseKeyParams(obj = obj), silent = T), + reduction = "umap", splitby = NULL, + label.cex = 7, + h = 7, w = NULL, + nr.cols = NULL, + plotname = ppp(toupper(reduction), ident), + cols = NULL, + palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[3], + max.cols.for.std.palette = 7, + highlight.clusters = NULL, cells.highlight = NULL, + cols.highlight = "red", + sizes.highlight = 1, + label = TRUE, repel = TRUE, + legend = !label, + legend.pos = NULL, # c("top", "bottom", "left", "right", "none")[2], + MaxCategThrHP = 200, + axes = NULL, + aspect.ratio = c(FALSE, 0.6)[2], + save.plot = MarkdownHelpers::TRUE.unless("b.save.wplots", v = FALSE), + PNG = TRUE, + check_for_2D = TRUE, + ...) { + # + + stopifnot( + is.character(caption) | is.null(caption), + is.logical(save.plot), + is.character(suffix) | is.null(suffix) + ) + tictoc::tic() + + if (is.null(ident)) { + ident <- GetNamedClusteringRuns(obj, v = F)[1] + message("Identity not provided. Plotting: ", ident) + } + + if (check_for_2D) { + umap_dims <- ncol(obj@reductions[[reduction]]@cell.embeddings) + if (umap_dims != 2) warning(">>> UMAP is not 2 dimensional! \n Check obj@reductions[[reduction]]@cell.embeddings") + } + + IdentFound <- (ident %in% colnames(obj@meta.data)) + if (!IdentFound) { + ident <- GetClusteringRuns(obj = obj, pat = "_res.*[0,1]\\.[0-9]$")[1] + iprint("Identity not found. Plotting", ident) + } + identity <- obj[[ident]] + NtCategs <- length(unique(identity[, 1])) + if (NtCategs > 1000) warning("More than 1000 levels! qUMAP?", immediate. = TRUE) + + + # Highlight specific clusters if provided _____________________________________________________ + if (!missing(highlight.clusters)) { + if (!(all(highlight.clusters %in% identity[, 1]))) { + MSG <- paste( + "Some clusters not found in the object! Missing:", + kppc(setdiff(highlight.clusters, unique(identity[, 1]))), "\nFrom:\n", + kppc(sort(unique(identity[, 1]))) + ) + warning(MSG, immediate. = TRUE) + } + + idx.ok <- identity[, 1] %in% highlight.clusters + stopifnot("minimum 10 cells are needed" = sum(idx.ok) > 10) + + highlight.these <- rownames(identity)[idx.ok] + PCT <- percentage_formatter(length(highlight.these) / ncol(obj), suffix = "or") + + # Annotation to subtitle _________________________________________________________________ + sub2 <- paste(PCT, length(highlight.these), "cells in", ident, "are highlighted") + sub3 <- paste("Highlighted clusters:", kppc(highlight.clusters)) + sub <- if (is.null(sub)) pnl(sub2, sub3) else pnl(sub, sub2, sub3) + + # title <- kpipe(ident, ) + } else { + highlight.these <- NULL + } + + # Message if highlighting cells _____________________________________________________________ + if (!missing(cells.highlight)) { + highlight.these <- cells.highlight + message("Highlighting ", length(highlight.these), " cells, e.g.: ", head(highlight.these)) + message("cols.highlight: ", cols.highlight ," | sizes.highlight: ", sizes.highlight) + } + + if (is.null(cols)) { + cols <- if (NtCategs > max.cols.for.std.palette) { + getDiscretePaletteObj( + ident.used = ident, palette.used = palette, + obj = obj, show.colors = FALSE + ) + } + } + + # if (F) cols <- adjustcolor(cols, alpha.f = alpha) + + if (!is.null(highlight.these)) { + cols <- "lightgrey" + } + + # Plot _________________________________________________________________________________________ + if (NtCategs > MaxCategThrHP) { + iprint("Too many categories (", NtCategs, ") in ", ident, "- use qUMAP for continous variables.") + } else { + if (length(unique(identity)) < MaxCategThrHP) { + gg.obj <- + Seurat::DimPlot( + object = obj, group.by = ident, + cols = cols, + reduction = reduction, split.by = splitby, + ncol = nr.cols, + cells.highlight = highlight.these, + cols.highlight = cols.highlight, + sizes.highlight = sizes.highlight, + label = label, repel = repel, label.size = label.cex, + ...) + + ggtitle(label = title, subtitle = sub) + + if (!legend) NoLegend() else NULL + } + + # Additional options ___________________________________________________ + if (is.null(axes)) gg.obj <- gg.obj + Seurat::NoAxes() + if (!is.null(caption)) gg.obj <- gg.obj + ggplot2::labs(caption = caption) + if (!is.null(legend.pos)) gg.obj <- gg.obj + ggplot2::theme(legend.position = legend.pos) + if (aspect.ratio) gg.obj <- gg.obj + ggplot2::coord_fixed(ratio = aspect.ratio) + if (legend) suffix <- paste0(suffix, ".lgnd") + + # Save plot ___________________________________________________________ + if (save.plot) { + pname <- sppp(prefix, plotname, paste0(ncol(obj),"c"), suffix, sppp(highlight.clusters)) + fname <- ww.FnP_parser(pname, if (PNG) "png" else "pdf") + try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w)) # , ncol = 1, nrow = 1 + } + tictoc::toc() + return(gg.obj) + } # if not too many categories +} + + + + + + +# _________________________________________________________________________________________________ +#' @title Highlight Selected Clusters on UMAP +#' +#' @description Generates a UMAP plot from a Seurat object with specified clusters highlighted. +#' It saves the resulting UMAP plot directly to the current working directory. +#' +#' @param obj Seurat object to be visualized; Default: `combined.obj`. +#' @param COI Vector of cluster IDs to highlight on the UMAP plot; +#' Default: `c("0", "2", "4")`. +#' @param ident Name of the metadata column containing cluster IDs; +#' Default: `GetClusteringRuns()[1]`. +#' @param h Height of the plot; Default: `7`. +#' @param w Width of the plot; Default: `5`. +#' @param show_plot Logical; if `TRUE`, the plot will be displayed in the RStudio viewer; +#' Default: `TRUE`. +#' @param ... Additional arguments to be passed to the `DimPlot` function.#' +#' +#' @return Saves a UMAP plot highlighting specified clusters to the current working directory. +#' The function itself does not return an object within R. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # GetClusteringRuns()[1] "integrated_snn_res.0.1" +#' umapHiLightSel(obj = combined.obj, COI = c("0", "1"), ident = GetClusteringRuns()[1]) +#' } +#' } +#' +#' @seealso \code{\link[Seurat]{DimPlot}} +#' +#' @export +#' @importFrom Seurat DimPlot +#' @importFrom ggplot2 ggsave +umapHiLightSel <- function(obj = combined.obj, + COI = c("0", "2", "4"), + ident = GetClusteringRuns()[1], + h = 7, w = 5, + show_plot = T, + ...) { + stopifnot(is(obj, "Seurat"), + "Ident no found the object!" =ident %in% colnames(obj@meta.data), + "Not all clusters in COI are found the object!" = all(COI %in% unique(obj@meta.data[[ident]])) + ) + + cellsSel <- getCellIDs.from.meta(ident = ident, ident_values = COI, obj = obj) + pl <- Seurat::DimPlot(obj, + reduction = "umap", + group.by = ident, + label = TRUE, + cells.highlight = cellsSel, + ... + ) + if(show_plot) print(pl) + + ggplot2::ggsave(filename = extPNG(kollapse("cells", COI, collapseby = ".")), + height = h, width = w) +} + + + +# _________________________________________________________________________________________________ +#' @title DimPlot.ClusterNames +#' +#' @description Plot UMAP with Cluster names. +#' @param obj Seurat object, Default: combined.obj +#' @param ident identity used, Default: 'cl.names.top.gene.res.0.5' +#' @param reduction UMAP, tSNE, or PCA (Dim. reduction to use), Default: 'umap' +#' @param title Title of the plot, Default: ident +#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' DimPlot.ClusterNames(obj = combined.obj) +#' } +#' } +#' @export +DimPlot.ClusterNames <- function( + obj = combined.obj, + ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + reduction = "umap", + title = ident, + ...) { + # + Seurat::DimPlot( + object = obj, reduction = reduction, group.by = ident, + label = TRUE, repel = TRUE, ... + ) + NoLegend() + ggtitle(title) +} + + + + +# _________________________________________________________________________________________________ +# Multiplex 2D UMAPs, etc. ______________________________ ---- + + +# _________________________________________________________________________________________________ +#' @title multiFeaturePlot.A4 +#' +#' @description Save multiple FeaturePlots, as jpeg, on A4 for each gene, which are stored as a list of gene names. +#' @param list.of.genes List of gene names for which the plots are to be generated. No default. +#' @param obj Seurat object, Default: combined.obj +#' @param subdir Should plots be saved in a sub-directory? Default: `TRUE`. +#' @param foldername Folder name to save the generated plots. Default: The name of the list of genes. +#' @param subtitle.from.names Should the subtitle be extracted from the names of the gene symbols, +#' eg: `c("Astrocytes" = "AQP4")` ? Default: `TRUE`. +#' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap' +#' @param intersectionAssay The assay to intersect with, either 'RNA' or 'integrated'. Default: 'RNA' +#' @param layout Layout orientation of the plot. Default: 'wide' +#' @param colors Vector of colors to be used in the plot. Default: c("grey", "red") +#' @param nr.Col Number of columns in the plot grid. Default: 2 +#' @param nr.Row Number of rows in the plot grid. Default: 4 +#' @param cex Point size in the plot. Default: round(0.1/(nr.Col * nr.Row), digits = 2) +#' @param gene.min.exp Minimum gene expression level for plotting. Default: 'q01' +#' @param gene.max.exp Maximum gene expression level for plotting. Default: 'q99' +#' @param prefix Prefix for the plot filenames. Default: NULL +#' @param suffix Suffix for the plot filenames. Default: NULL +#' @param background_col Background color of the plots. Default: "white" +#' @param saveGeneList Should the list of genes be saved? Default: `FALSE`. +#' @param w Width of the plot. Default: 8.27 +#' @param h Height of the plot. Default: 11.69 +#' @param scaling Scaling factor for plot size. Default: 1 +#' @param aspect.ratio Should the aspect ratio be fixed? Default: Yes, at 0.6 +#' @param format Format to save the plot file. Default: 'jpg' +#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @seealso +#' \code{\link[tictoc]{tic}} +#' \code{\link[cowplot]{plot_grid}} +#' @importFrom tictoc tic toc +#' @importFrom cowplot plot_grid +#' @importFrom MarkdownReports create_set_OutDir +#' +#' @export +multiFeaturePlot.A4 <- function( + list.of.genes, + obj = combined.obj, + subdir = TRUE, + foldername = substitute(list.of.genes), + subtitle.from.names = TRUE, + plot.reduction = "umap", + intersectionAssay = c("RNA", "integrated")[1], + layout = c("tall", "wide", FALSE)[2], + colors = c("grey", "red"), + nr.Col = 2, nr.Row = 4, + raster = if (ncol(obj) > 1e5) TRUE else FALSE, + cex = round(0.1 / (nr.Col * nr.Row), digits = 2), + cex.min = if (raster) TRUE else FALSE, + gene.min.exp = "q01", gene.max.exp = "q99", + prefix = NULL, suffix = NULL, + background_col = "white", + aspect.ratio = c(FALSE, 0.6)[2], + saveGeneList = FALSE, + w = 8.27, h = 11.69, scaling = 1, + format = c("jpg", "pdf", "png")[1], + ...) { + tictoc::tic() + ParentDir <- OutDir + if (is.null(foldername)) foldername <- "genes" + final.foldername <- FixPlotName(paste0(foldername, "-", plot.reduction, suffix)) + if (subdir) create_set_SubDir(final.foldername, "/", verbose = FALSE) + + if(is.null(names(list.of.genes))) subtitle.from.names <- FALSE + + list.of.genes.found <- check.genes( + list.of.genes = list.of.genes, obj = obj, + assay.slot = intersectionAssay, makeuppercase = FALSE + ) + if(subtitle.from.names) names(list.of.genes.found) <- as.character(flip_value2name(list.of.genes)[list.of.genes.found]) + DefaultAssay(obj) <- intersectionAssay + + if (!is.null(cex.min)) cex <- max(cex.min, cex) + + if (layout == "tall") { + w <- 8.27 * scaling + h <- 11.69 * scaling + nr.Col <- 2 + nr.Row <- 4 + print("layout active, nr.Col ignored.") + } + if (layout == "wide") { + w <- 11.69 * scaling + h <- 8.27 * scaling + nr.Col <- 2 + nr.Row <- 2 + print("layout active, nr.Col ignored.") + } + + lsG <- CodeAndRoll2::split_vec_to_list_by_N(1:length(list.of.genes.found), by = nr.Row * nr.Col) + for (i in 1:length(lsG)) { + genes <- list.of.genes.found[lsG[[i]]] + iprint(i, genes) + plotname <- kpp(c(prefix, plot.reduction, i, genes, suffix, format)) + + plot.list <- Seurat::FeaturePlot( + object = obj, features = genes, reduction = plot.reduction, combine = FALSE, + ncol = nr.Col, cols = colors, raster = raster, + min.cutoff = gene.min.exp, max.cutoff = gene.max.exp, + pt.size = cex, ... + ) + + # Remove the legend and axes + for (j in 1:length(plot.list)) { + plot.list[[j]] <- plot.list[[j]] + NoLegend() + NoAxes() + if (aspect.ratio) plot.list[[j]] <- plot.list[[j]] + ggplot2::coord_fixed(ratio = aspect.ratio) + if (subtitle.from.names) plot.list[[j]] <- plot.list[[j]] + ggplot2::ggtitle(label = genes[j], subtitle = names(genes)[j] ) + } + # browser() + + pltGrid <- cowplot::plot_grid(plotlist = plot.list, ncol = nr.Col, nrow = nr.Row) + # cowplot::ggsave2(filename = plotname, width = w, height = h, bg = background_col, plot = pltGrid) + cowplot::save_plot( + plot = pltGrid, filename = plotname, + base_width = w, base_height = h, + bg = background_col + ) + } + + if (subdir) MarkdownReports::create_set_OutDir(ParentDir, verbose = FALSE) + if (saveGeneList) { + if (is.null(obj@misc$gene.lists)) obj@misc$gene.lists <- list() + obj@misc$gene.lists[[substitute(list.of.genes)]] <- list.of.genes.found + print("Genes saved under: obj@misc$gene.lists") + return(obj) + } + tictoc::toc() +} + + + +# ____________________________________________________________________________________ +#' @title Generate Cluster Highlight UMAPs compiled into A4 pages +#' +#' @description This function generates and saves cluster highlight plots for both single and multiple +#' clusters using UMAP or other dimensionality reduction techniques. It supports saving plots in various +#' formats and allows customization of plot appearance and layout. +#' +#' @param ident The name of the metadata column in the Seurat object `obj` to use for identifying clusters. +#' @param obj A Seurat object combining multiple datasets. Default: `combined.obj`. +#' @param foldername Name of the folder to save the plots in. Default: Value of `ident`. +#' @param plot.reduction The dimensionality reduction technique to use for the plots. Default: `"umap"`. +#' @param intersectionAssay The assay to use when calculating intersections. Default: `"RNA"`. +#' @param layout Plot layout, can be `"tall"`, `"wide"`, or `FALSE` for no specific layout. Default: `"wide"`. +#' @param colors A vector of colors to use for non-highlighted and highlighted clusters. Default: `c("grey", "red")`. +#' @param nr.Col Number of columns in the plot grid. Default: 2. +#' @param nr.Row Number of rows in the plot grid. Default: 4. +#' @param cex Size of the text in the plot, calculated based on the number of rows and columns. Default: Calculated value. +#' @param subdir Logical flag indicating whether to create a subdirectory for the plots. Default: `TRUE`. +#' @param prefix Optional prefix for the plot file names. Default: `NULL`. +#' @param suffix Optional suffix for the plot file names. Default: `NULL`. +#' @param background_col Background color of the plots. Default: `"white"`. +#' @param aspect.ratio Aspect ratio of the plots, can be `FALSE` for default ratio or a numeric value. Default: 0.6. +#' @param saveGeneList Logical flag indicating whether to save the list of genes used in the plots. Default: `FALSE`. +#' @param w Width of the plots, in inches. Default: `8.27`. +#' @param h Height of the plots, in inches. Default: `11.69`. +#' @param scaling Scaling factor for adjusting the size of the plots. Default: 1. +#' @param format Format to save the plots in, can be `"jpg"`, `"pdf"`, or `"png"`. Default: `"jpg"`. +#' @param ... Additional arguments passed to lower-level plotting functions. +#' +#' @return Invisible. This function primarily saves plots to files. +#' @examples +#' multiSingleClusterHighlightPlots.A4(ident = "cluster_id", obj = yourSeuratObject) +#' +#' @importFrom ggplot2 ggplot geom_point +#' @importFrom cowplot plot_grid ggsave2 +#' @importFrom tictoc tic toc +#' @importFrom MarkdownReports create_set_OutDir +#' +#' @export +multiSingleClusterHighlightPlots.A4 <- function( + obj = combined.obj, + ident = GetClusteringRuns(obj)[1], + foldername = ident, + plot.reduction = "umap", + intersectionAssay = DefaultAssay(combined.obj), # c("RNA", "integrated")[1], + layout = c("tall", "wide", FALSE)[2], + colors = c("grey", "red"), + nr.Col = 2, nr.Row = 4, + cex = round(0.1 / (nr.Col * nr.Row), digits = 2), + subdir = TRUE, + prefix = NULL, suffix = NULL, + background_col = "white", + aspect.ratio = c(FALSE, 0.6)[2], + saveGeneList = FALSE, + w = 8.27, h = 11.69, scaling = 1, + format = c("jpg", "pdf", "png")[1], + ...) { + message(" > Running multiSingleClusterHighlightPlots.A4...") + + NrCellsPerCluster <- sort(table(obj[[ident]]), decreasing = TRUE) + stopifnot("Some clusters too small (<20 cells). See: table(obj[[ident]]) | Try: removeResidualSmallClusters()" = + all(NrCellsPerCluster > 20)) + + tictoc::tic() + ParentDir <- OutDir + if (is.null(foldername)) foldername <- "clusters" + if (subdir) MarkdownReports::create_set_SubDir(paste0(foldername, "-", plot.reduction), "/") + + clusters <- unique(obj@meta.data[[ident]]) + + DefaultAssay(obj) <- intersectionAssay + + # Adjust plot dimensions and grid layout based on specified layout + if (layout == "tall") { + w <- 8.27 * scaling + h <- 11.69 * scaling + nr.Col <- 2 + nr.Row <- 4 + message("tall layout active, nr.Col ignored.") + } + + if (layout == "wide") { + w <- 11.69 * scaling + h <- 8.27 * scaling + nr.Col <- 2 + nr.Row <- 2 + message("wide layout active, nr.Col ignored.") + } + + # Split clusters into lists for plotting + ls.Clust <- CodeAndRoll2::split_vec_to_list_by_N(1:length(clusters), by = nr.Row * nr.Col) + for (i in 1:length(ls.Clust)) { + clusters_on_this_page <- clusters[ls.Clust[[i]]] + iprint("page:", i, "| clusters", kppc(clusters_on_this_page)) + (plotname <- kpp(c(prefix, plot.reduction, i, "clusters", ls.Clust[[i]], suffix, format))) + + plot.list <- list() + for (j in seq(clusters_on_this_page)) { + cl <- clusters_on_this_page[j] + message(cl) + plot.list[[j]] <- clUMAP( + ident = ident, obj = obj, + highlight.clusters = cl, label = FALSE, legend = FALSE, save.plot = FALSE, + plotname = plotname, cols = colors, h = h, w = w, ... + ) + } # for j + + # Customize plot appearance + for (j in 1:length(plot.list)) { + plot.list[[j]] <- plot.list[[j]] + NoLegend() + NoAxes() + if (aspect.ratio) { + plot.list[[j]] <- plot.list[[j]] + + ggplot2::coord_fixed(ratio = aspect.ratio) + } + } # for j2 + + # Save plots + pltGrid <- cowplot::plot_grid(plotlist = plot.list, ncol = nr.Col, nrow = nr.Row) + cowplot::ggsave2(filename = plotname, width = w, height = h, bg = background_col, plot = pltGrid) + } # for ls.Clust + + if (subdir) MarkdownReports::create_set_OutDir(ParentDir) + tictoc::toc() +} + + + + +# _________________________________________________________________________________________________ +#' @title Quick Clustering UMAPs on A4 Page +#' +#' @description Generates and arranges UMAP plots for up to four specified clustering resolutions +#' from a Seurat object onto an A4 page, facilitating comparative visualization. +#' +#' @param obj Seurat object to visualize; Default: `combined.obj`. +#' @param idents Vector of clustering resolution identifiers to plot; +#' dynamically defaults to the first 4 found by `GetClusteringRuns`. +#' @param prefix Prefix for plot titles; Default: "Clustering.UMAP.Res". +#' @param suffix Suffix for plot titles; Default: "". +#' @param title Custom title for the composite plot; dynamically generated from `prefix`, `idents`, and `suffix`. +#' @param nrow Number of rows in the plot grid; Default: 2. +#' @param ncol Number of columns in the plot grid; Default: 2. +#' @param w Width of the plot; Default: 11.69. +#' @param h Height of the plot; Default: 8.27. +#' @param ... Additional parameters for individual UMAP plots. +#' +#' @examples +#' \dontrun{ +#' qClusteringUMAPS() +#' } +#' +#' @export +#' @importFrom Seurat NoAxes +#' @importFrom ggExpress qA4_grid_plot +qClusteringUMAPS <- function( + obj = combined.obj, + idents = na.omit.strip(GetClusteringRuns(obj)[1:4]), + prefix = "Clustering.UMAP.Res", + suffix = "", + nrow = 2, ncol = 2, + w = 11.69, h = 8.27, + title = sppu( + prefix, + as.numeric(stringr::str_extract(idents, "\\d+\\.\\d+$")), + suffix + ), + ...) { + message(" > Running qClusteringUMAPS...") + + # Check that the QC markers are in the object + idents.found <- intersect(idents, colnames(obj@meta.data)) + n.found <- length(idents.found) + stopifnot("None of the idents found" = n.found > 0) + message(kppws(n.found, " found of ", idents)) + + if(n.found >5) { + idents.found <- idents.found[1:4] + message("Only the first 4 idents will be plotted: ", idents.found) + } + + px <- list( + "A" = clUMAP(ident = idents[1], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "B" = clUMAP(ident = idents[2], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "C" = clUMAP(ident = idents[3], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "D" = clUMAP(ident = idents[4], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes() + ) + + ggExpress::qA4_grid_plot( + plot_list = px, + plotname = title, + w = w, h = h, + nrow = nrow, ncol = ncol + ) +} + +# _________________________________________________________________________________________________ +#' @title Quickly Draw 4 Gene Expression UMAPs on an A4 Page +#' +#' @description Generates and arranges UMAP plots for up to four specified gene expressions +#' from a Seurat object onto an A4 page, facilitating comparative visualization. +#' +#' @param obj Seurat object to visualize; Default: `combined.obj`. +#' @param features Vector of gene identifiers to plot; +#' dynamically defaults to the first 4 found by `rownames(obj)`. +#' @param prefix Prefix for plot titles; Default: "Expression.UMAP.Gene". +#' @param suffix Suffix for plot titles; Default: "". +#' @param title Custom title for the composite plot; dynamically generated from `prefix`, `genes`, and `suffix`. +#' @param nrow Number of rows in the plot grid; Default: 2. +#' @param ncol Number of columns in the plot grid; Default: 2. +#' @param w Width of the plot; Default: 11.69. +#' @param h Height of the plot; Default: 8.27. +#' @param ... Additional parameters for individual UMAP plots. +#' +#' @examples +#' \dontrun{ +#' qGeneExpressionUMAPS() +#' } +#' +#' @export +#' @importFrom Seurat NoAxes +#' @importFrom ggExpress qA4_grid_plot +qGeneExpressionUMAPS <- function( + obj = combined.obj, + features = rownames(obj)[1:4], + prefix = "Expression.UMAP.Gene", + suffix = "", + nrow = 2, ncol = 2, + w = 11.69, h = 8.27, + title = paste0(prefix, " ", paste(features, collapse = ", "), " ", suffix), + ...) { + message("Plotting qGeneExpressionUMAPS") + + # Check that the features are in the object + features.found <- intersect(features, c(colnames(obj@meta.data), rownames(obj))) + n.found <- length(features.found) + stopifnot("None of the features found" = n.found > 1, + "Only 4 features are allowed" = n.found <5) + + message(kppws(n.found, "found of", length(features), "features:", features)) + + px <- list( + "A" = qUMAP(feature = features[1], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "B" = qUMAP(feature = features[2], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "C" = qUMAP(feature = features[3], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(), + "D" = qUMAP(feature = features[4], save.plot = FALSE, obj = obj, ...) + NoAxes() + ) + + ggExpress::qA4_grid_plot( + plot_list = px, + plotname = title, + w = w, h = h, + nrow = nrow, ncol = ncol + ) +} + + +# _________________________________________________________________________________________________ +#' @title Plot qUMAPs for Genes in a Folder +#' +#' @description This function plots qUMAPs for a specified set of genes, storing the results in a +#' specified folder. If no folder name is provided, it defaults to using the gene set name. +#' +#' @param genes A vector of gene names to be plotted. +#' @param obj An object containing the UMAP and gene data. Default: combined.obj. +#' @param foldername The name of the folder where the plots will be saved. If NULL, the gene set +#' name is used. Default: NULL. +#' @param intersectionAssay The assay slot to use for intersection. Default: 'RNA'. +#' @param plot.reduction The type of reduction to plot. Default: 'umap'. +#' @param ... Additional arguments passed to plotting and directory creation functions. +#' +#' @return Invisible. The function generates plots and saves them in the specified folder. +#' +#' @examples +#' plotQUMAPsInAFolder( +#' genes = c("Gene1", "Gene2"), obj = combined.obj, +#' foldername = "MyGenePlots", intersectionAssay = "RNA", +#' plot.reduction = "umap" +#' ) +#' +#' @importFrom MarkdownReports create_set_SubDir create_set_OutDir +#' @export + +plotQUMAPsInAFolder <- function(genes, obj = combined.obj, + foldername = NULL, + intersectionAssay = DefaultAssay(obj), + plot.reduction = "umap", + ...) { + message(" > Running plotQUMAPsInAFolder...") + + # Input checks + stopifnot(is.character(genes), + is.null(foldername) || is.character(foldername), + is.character(plot.reduction)) + + ParentDir <- OutDir + if (is.null(foldername)) foldername <- deparse(substitute(genes)) + + MarkdownReports::create_set_SubDir(paste0(foldername, "-", plot.reduction), "/") + + list.of.genes.found <- check.genes( + list.of.genes = genes, obj = obj, + assay.slot = intersectionAssay, makeuppercase = FALSE + ) + + for (g in list.of.genes.found) { + message(g) + qUMAP(g, reduction = plot.reduction, obj = obj, ...) + } + + MarkdownReports::create_set_OutDir(ParentDir) + + invisible() +} + + +# _________________________________________________________________________________________________ +#' @title Plot Top N Differentially Expressed Genes Per Cluster +#' +#' @description Visualizes the top N differentially expressed (DE) genes for each cluster within a +#' specified clustering resolution of a Seurat object, facilitating the exploration of gene +#' expression patterns across clusters. +#' +#' @param obj Seurat object containing single-cell RNA-seq data and clustering information; +#' Default: `combined.obj`. +#' @param cl_res Cluster resolution used to identify distinct clusters for analysis; Default: `res`. +#' @param nrGenes Number of top DE genes to display for each cluster; +#' Default: GetClusteringRuns()[1]. +#' @param order.by Criteria for ranking DE genes within clusters; Default: `"combined.score"`. +#' @param df_markers Data frame or list of DE genes across clusters. If not provided, +#' attempts to retrieve from `obj@misc$df.markers[[paste0("res.", cl_res)]]`; +#' Default: calculated based on `cl_res`. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' PlotTopGenesPerCluster(obj = combined.obj, cl_res = 0.5, nrGenes = 10) +#' } +#' } +#' +#' @export +PlotTopGenesPerCluster <- function( + obj = combined.obj, + cl_res = GetClusteringRuns()[1], + nrGenes = p$"n.markers", + order.by = c("combined.score", "avg_log2FC", "p_val_adj")[1], + df_markers = obj@misc$"df.markers"[[paste0("res.", cl_res)]], + ...) { + message(" > Running PlotTopGenesPerCluster...") + + topX.markers <- GetTopMarkers( + dfDE = df_markers, n = nrGenes, + order.by = order.by + ) + + ls.topMarkers <- splitbyitsnames(topX.markers) + for (i in 1:length(ls.topMarkers)) { + multiFeaturePlot.A4( + list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = T, foldername = ppp('TopGenes.umaps'), + prefix = ppp("DEG.markers.res", cl_res, "cluster", names(ls.topMarkers)[i]) + ) + } +} + +# _________________________________________________________________________________________________ +#' @title Quickly Plot Key QC Markers in Brain Organoids +#' +#' @description Generates and arranges UMAP plots for specified QC features +#' from a Seurat object on an A4 page, facilitating a quick quality control (QC) overview. +#' +#' @param obj Seurat object to visualize; Default: `combined.obj`. +#' @param QC.Features Vector of QC feature names to plot; Default: +#' `c("nFeature_RNA", "percent.ribo", "percent.mito", "nuclear.fraction")`. +#' @param prefix Prefix for plot titles; Default: "QC.markers.4.UMAP". +#' @param suffix Suffix for plot titles; Default: "". +#' @param title Custom title for the composite plot; dynamically generated from `prefix`, +#' `QC.Features`, and `suffix`. +#' @param nrow Number of rows in the plot grid; Default: 2. +#' @param ncol Number of columns in the plot grid; Default: 2. +#' @param ... Additional parameters for individual UMAP plots. +#' +#' @examples +#' \dontrun{ +#' qQC.plots.BrainOrg() +#' } +#' +#' @export +#' @importFrom ggExpress qA4_grid_plot +qQC.plots.BrainOrg <- function( + obj = combined.obj, + QC.Features = c("nFeature_RNA", "percent.ribo", "percent.mito", "nuclear.fraction", "percent.HGA"), + prefix = "QC.markers.4.UMAP", + suffix = "", + title = sppu(prefix, QC.Features, suffix), + nrow = 2, ncol = 2, + ...) { + message(" > Plotting qQC.plots.BrainOrg...") + + # Check that the QC markers are in the object + QC.Features.Found <- intersect(QC.Features, colnames(obj@meta.data)) + n.found <- length(QC.Features.Found) + message(kppws(n.found, " found: ", QC.Features.Found)) + stopifnot(n.found > 1) + + + # Count the number of NAs in specified columns + na_counts <- sapply(X = obj@meta.data[, QC.Features.Found], function(x) sum(is.na(x))) + + # Raise a warning if there are any NAs + if (sum(na_counts) > 0) { + warning(sprintf("There are %d NA values found\n", na_counts), + immediate. = TRUE + ) + } + + px <- list( + "A" = qUMAP(QC.Features.Found[1], save.plot = FALSE, obj = obj, ...) + NoAxes(), + "B" = qUMAP(QC.Features.Found[2], save.plot = FALSE, obj = obj, ...) + NoAxes(), + "C" = qUMAP(QC.Features.Found[3], save.plot = FALSE, obj = obj, ...) + NoAxes(), + "D" = qUMAP(QC.Features.Found[4], save.plot = FALSE, obj = obj, ...) + NoAxes() + ) + + ggExpress::qA4_grid_plot( + plot_list = px, + plotname = title, + w = 11.69, h = 8.27, + nrow = nrow, ncol = ncol + ) +} + + +# _________________________________________________________________________________________________ +#' @title Quickly Plot Key Markers in Brain Organoids +#' +#' @description Generates plots for a predefined or custom set of gene markers within brain organoids, +#' aiding in the quick assessment of their expression across different cells or clusters. +#' +#' @param obj Seurat object for visualization; Default: `combined.obj`. +#' @param custom.genes Logical indicating whether to use a custom set of genes. +#' If FALSE, a predefined list of key brain organoid markers is used; Default: `FALSE`.. +#' @param suffix Suffix for the folder name where the plots are saved; Default: "". +#' +#' @examples +#' \dontrun{ +#' qMarkerCheck.BrainOrg(combined.obj) +#' qMarkerCheck.BrainOrg(combined.obj, custom.genes = c("Gene1", "Gene2")) +#' } +#' +#' @export +#' @importFrom CodeAndRoll2 as_tibble_from_namedVec + +qMarkerCheck.BrainOrg <- function(obj = combined.obj, custom.genes = FALSE, + suffix = "") { + message(" > Running qMarkerCheck.BrainOrg...") + + Signature.Genes.Top16 <- if (!isFALSE(custom.genes)) { + custom.genes + } else { + Signature.Genes.Top16 <- c( + `dl-EN` = "KAZN", `ul-EN` = "SATB2" # dl-EN = deep layer excitatory neuron + , `Immature neurons` = "SLA", Interneurons = "DLX6-AS1", + Interneurons = "ERBB4", Interneurons = "SCGN", + `Intermediate progenitor` = "EOMES" # , `Intermediate progenitor1` = "TAC3" + , `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C + , `oRG` = "HOPX" # , `oRG` = "ID4" # oRG outer radial glia + # , Astroglia = "GFAP" + , Astrocyte = "S100B", `Hypoxia/Stress` = "DDIT4", + `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A", + `Mesenchyme` = "DCN", Glycolytic = "PDK1" + # , `Choroid.Plexus` = "OTX2", `Mesenchyme` = "DCN" + ) + print(Signature.Genes.Top16) + } + + stopifnot() + + print(CodeAndRoll2::as_tibble_from_namedVec(Signature.Genes.Top16)) + multiFeaturePlot.A4( + obj = obj, list.of.genes = Signature.Genes.Top16, layout = "tall", + foldername = sppp("Signature.Genes.Top16", suffix) + ) +} + + + + + +# _________________________________________________________________________________________________ +#' @title Plot Top Genes +#' +#' @description This function plots the highest expressed genes on UMAPs, saving the plots in a +#' subfolder. It requires the prior execution of `calc.q99.Expression.and.set.all.genes`. +#' +#' @param obj A Seurat object containing the data for plotting. Default: combined.obj. +#' @param n The number of top genes to plot. Default: 32. +#' @param exp.slot The slot in the Seurat object where the expression data is stored. +#' Default: "expr.q99". +#' +#' @details This function identifies the top `n` genes based on expression levels stored in +#' `exp.slot` of the provided Seurat object. It then plots these genes using UMAPs and saves +#' the results in a subfolder named "Highest.Expressed.Genes". +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' PlotTopGenes() +#' } +#' } +#' +#' @export + +PlotTopGenes <- function(obj = combined.obj, n = 32, exp.slot = "expr.q99") { + message("Using obj@misc$", exp.slot) + stopifnot(inherits(obj, "Seurat"), + "Requires calling calc.q99.Expression.and.set.all.genes before. " = + exp.slot %in% names(obj@misc) + ) + + Highest.Expressed.Genes <- names(head(sort(obj@misc[[exp.slot]], decreasing = TRUE), n = n)) + multiFeaturePlot.A4(list.of.genes = Highest.Expressed.Genes, foldername = "Highest.Expressed.Genes") +} + + + + + + +# _________________________________________________________________________________________________ +# Manipulating UMAP and PCA ______________________________ ---- +# _________________________________________________________________________________________________ + +#' @title Flip Reduction Coordinates +#' +#' @description Flips dimensionality reduction coordinates (such as UMAP or tSNE) vertically or +#' horizontally to change the visualization perspective. +#' +#' @param obj Seurat object to modify; Default: `combined.obj`. +#' @param dim Number of dimensions in the reduction to consider; Default: 2. +#' @param reduction Dimension reduction technique to modify ('umap', 'tsne', or 'pca'); Default: 'umap'. +#' @param flip Axis (or axes) to flip; can be 'x', 'y', or 'xy' to flip both; Default: "x". +#' @param FlipReductionBackupToo Boolean indicating whether to also flip coordinates in the backup slot; Default: `TRUE`.. +#' +#' @examples +#' \dontrun{ +#' # Before flipping UMAP coordinates +#' clUMAP() +#' # Flip UMAP coordinates and visualize again +#' combined.obj <- FlipReductionCoordinates(combined.obj) +#' clUMAP() +#' } +#' +#' @export +#' @importFrom Seurat Embeddings +FlipReductionCoordinates <- function( + obj = combined.obj, dim = 2, reduction = "umap", + flip = c("x", "y", "xy", NULL)[1], FlipReductionBackupToo = TRUE) { + coordinates <- Embeddings(obj, reduction = reduction) + stopifnot(ncol(coordinates) == dim) + + if (flip %in% c("x", "xy")) coordinates[, 1] <- coordinates[, 1] * -1 + if (flip %in% c("y", "xy")) coordinates[, 2] <- coordinates[, 2] * -1 + obj@reductions[[reduction]]@cell.embeddings <- coordinates + + if (FlipReductionBackupToo) { + bac.slot <- paste0(reduction, dim, "d") + if (length(obj@misc$reductions.backup[[bac.slot]])) { + obj@misc$reductions.backup[[bac.slot]]@cell.embeddings <- coordinates + iprint(dim, "dimensional", reduction, "backup flipped too.") + } + } + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title Relabel Cluster Numbers Along a UMAP (or tSNE) Axis +#' +#' @description Automatically renumbers clusters based on their position along a specified dimension +#' in a UMAP (or tSNE or PCA) plot, potentially enhancing interpretability by ordering clusters. +#' +#' @param obj Seurat object containing clustering and UMAP (or other dimensional reduction) data; +#' Default: `combined.obj`. +#' @param dim Dimension along which to order clusters (1 for the first dimension, typically horizontal); +#' Default: 1. +#' @param swap If TRUE, reverses the ordering direction; Default: `FALSE`.. +#' @param reduction Dimension reduction technique used for cluster positioning ('umap', 'tsne', or 'pca'); +#' Default: 'umap'. +#' @param ident Clustering resolution identifier used to fetch cluster labels from `obj` metadata; +#' Default: 'integrated_snn_res.0.5'. +#' @param plot If TRUE, plots the UMAP with new cluster names; Default: `TRUE`.. +#' +#' @examples +#' \dontrun{ +#' combined.obj <- AutoNumber.by.UMAP( +#' obj = combined.obj, dim = 1, reduction = "umap", +#' ident = "integrated_snn_res.0.5" +#' ) +#' DimPlot.ClusterNames(combined.obj, ident = GetClusteringRuns(combined.obj)[1]) +#' } +#' +#' @export +#' @importFrom CodeAndRoll2 translate +#' @importFrom Stringendo kpp kppu iprint +#' @importFrom Seurat FetchData +AutoNumber.by.UMAP <- function(obj = combined.obj, + reduction = "umap", + dim = 1, swap = FALSE, + ident = GetClusteringRuns(obj = obj)[1], + plot = TRUE) { + dim_name <- kppu(reduction, dim) + if (obj@version < "5") dim_name <- toupper(dim_name) + message("Obj. version: ", obj@version, " \ndimension name: ", dim_name) + message("Resolution: ", ident) + + stopifnot("Identity not found." = ident %in% colnames(obj@meta.data)) + + coord.umap <- obj@reductions$umap@cell.embeddings[, dim_name] + + # coord.umap <- round(coord.umap,digits = 2) + identX <- as.character(obj@meta.data[[ident]]) + + ls.perCl <- split(coord.umap, f = identX) + MedianClusterCoordinate <- sapply(ls.perCl, median) + # sort(MedianClusterCoordinate) + + OldLabel <- names(sort(MedianClusterCoordinate, decreasing = swap)) + NewLabel <- as.character(0:(length(MedianClusterCoordinate) - 1)) + NewMeta <- CodeAndRoll2::translate(vec = identX, old = OldLabel, new = NewLabel) + NewMetaCol <- kpp(ident, "ordered") + iprint("NewMetaCol:", NewMetaCol) + + obj[[NewMetaCol]] <- NewMeta + if (plot) { + x <- clUMAP(obj, ident = NewMetaCol) + print(x) + } + return(obj) +} + + +# _________________________________________________________________________________________________ +# General ______________________________ ---- +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +# DGEA and GO-term enrichment ______________________________ ---- +# _________________________________________________________________________________________________ + + +#' @title scEnhancedVolcano +#' +#' @description This function creates an enhanced volcano plot. +#' +#' @param toptable A data frame with the results of differential gene expression analysis. +#' @param lab A vector of gene symbols to label on the plot. +#' @param suffix A string to append to the filename/title of the plot. +#' @param title The title of the plot. +#' @param suffix A string to append to the filename/title of the plot. +#' @param caption The first line of caption of the plot. +#' @param caption2 The second line of caption of the plot. +#' @param x The x-axis, which is typically the average log2 fold change. +#' @param y The y-axis, which is typically the adjusted p-value. +#' @param selectLab A vector of gene symbols to select for labeling. +#' @param min.p The minimum p-value, to trim high values on the Y-axis. +#' @param max.l2fc The maximum log2 fold change, to trim high values on the X-axis. +#' @param min.pct.cells The minimum percentage of cells in which a gene must be expressed to be included in the plot. +#' @param pCutoffCol The column in the toptable that contains the p-value cutoff. +#' @param pCutoff The p-value cutoff. +#' @param FCcutoff The fold change cutoff. +#' @param count_stats Logical. Calculates a data frame with the count statistics. +#' @param drawConnectors Whether to draw connectors between the labels and the points. +#' @param max.overlaps The maximum number of labels that can overlap. +#' @param min.p The minimum p-value, to trim high values on the Y-axis. +#' @param h The height of the plot. +#' @param w The width of the plot. +#' @param ... Pass any other parameter to `EnhancedVolcano::EnhancedVolcano()`. +#' +#' @return A ggplot object. +#' +#' @importFrom EnhancedVolcano EnhancedVolcano +#' +#' @export + +scEnhancedVolcano <- function( + toptable, + x = "avg_log2FC", + y = "p_val_adj", + lab = rownames(toptable), + title = paste("DGEA"), + + selectLab = trail(lab, 10), + min.p = 1e-50, + max.l2fc = Inf, + min.pct.cells = 0.1, + pCutoffCol = "p_val_adj", + pCutoff = 1e-3, + FCcutoff = 1, + + suffix = NULL, + caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)), + caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"), + + count_stats = TRUE, + drawConnectors = T, max.overlaps = Inf, + h = 9, w = h, + ...) { + # + message("\nMin. log2fc: ", FCcutoff, "\nMax. p-adj: ", pCutoff, + "\nMin. p-adj (trim high y-axis): ", min.p, + "\nMin. pct cells expressing: ", min.pct.cells) + stopifnot(nrow(toptable) >5) + + + # Filter min. cells expressing. + toptable <- toptable |> dplyr::filter(pct.1 > min.pct.cells | pct.2 > min.pct.cells) + + # calculate true min pct cells expressing (maybe input prefiltered above thr. already). + min.pct.cells <- toptable |> select(pct.1, pct.2) |> as.matrix() |> rowMax() |> min() + + # Clip p-values. + toptable[["p_val_adj"]] <- + clip.at.fixed.value(x = toptable[["p_val_adj"]], thr = min.p, high = F) + + # Clip log2FC. + if (max.l2fc < Inf) { + toptable[["avg_log2FC"]] <- + clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = -max.l2fc, high = F) + toptable[["avg_log2FC"]] <- + clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = max.l2fc, high = T) + } + + # Add statistical information to the subtitle. + if (count_stats) { + enr_stats <- unlist(countRelevantEnrichments(df = toptable, logfc_col = x, pval_col = y, + logfc_cutoff = FCcutoff, pval_cutoff = pCutoff)) + stat_info <- kppws("Genes", intermingle2vec(names(enr_stats), enr_stats),"(red)") + subtitle <- paste0(stat_info, "\n", + paste("Cutoffs: max.p_adj: ", pCutoff, " | min.log2FC: ", FCcutoff, + " | min.pct.cells: ", min.pct.cells)) + } + caption <- paste0(caption, "\n", caption2) + + # Create an enhanced volcano plot. + # try.dev.off(); + pobj <- EnhancedVolcano::EnhancedVolcano( + toptable = toptable + , x = x, y = y + , title = title, subtitle = subtitle + , lab = lab, selectLab = selectLab + , caption = caption + , pCutoffCol = pCutoffCol + , pCutoff = pCutoff + , FCcutoff = FCcutoff + , drawConnectors=drawConnectors + , max.overlaps = max.overlaps + , ...) + + print(pobj) + # Save the plot. + qqSave(ggobj = pobj, title = paste0("Volcano.", make.names(title), suffix), + h = h, w = w) + return(pobj) +} + +# ________________________________________________________________________ +#' @title Estimate Minimum Log2-Based Fold Change +#' +#' @description This function estimates the minimum log2-based fold change from a data frame column. +#' +#' @param df A data frame containing the fold change data. Default: `df.m.UL`. +#' @param col A character string specifying the column name containing log2 fold change values. Default: "avg_log2FC". +#' +#' @return The minimum log2-based fold change, rounded and transformed from log2 to linear scale. +#' +#' @examples +#' \dontrun{ +#' df <- data.frame(avg_log2FC = c(-1, -0.5, 0.5, 1)) +#' .estMinimumFC(df, "avg_log2FC") +#' # .estMinimumFC(df = df.m.UL, col = "avg_log2FC") +#' } +#' @return The minimum log2-based fold change, rounded and transformed from log2 to linear scale. + +.estMinimumFC <- function(df, col = "avg_log2FC") { + lfc <- df[[col]] + lfc_enr <- min(lfc[lfc>0]) + lfc_depl <- abs(max(lfc[lfc<0])) + estim_min_l2fc <- min(lfc_enr, lfc_depl) + return(iround(2^estim_min_l2fc)) +} + + +# ________________________________________________________________________ +#' @title Count Relevant Enrichments +#' +#' @description This function counts the number of relevantly expressed genes from a differential +#' gene expression table. It considers genes to be relevant if they fall under a maximum p-value +#' cutoff and are above a minimum log2 fold change cutoff. The function reports the number of +#' enriched and depleted genes. +#' +#' @param df Data frame containing the gene expression data. +#' @param pval_col Character. Name of the column containing the adjusted p-values. Default: "p_val_adj". +#' @param logfc_col Character. Name of the column containing the log2 fold change values. Default: "avg_log2FC". +#' @param pval_cutoff Numeric. The maximum adjusted p-value to consider a gene relevant. Default: 1e-2. +#' @param logfc_cutoff Numeric. The minimum log2 fold change to consider a gene relevant. Default: 1. +#' +#' @return A list with the counts of enriched and depleted genes. +#' @export +#' +#' @examples +#' df <- data.frame( +#' p_val_adj = c(0.001, 0.02, 0.03, 0.0001), +#' avg_log2FC = c(1.5, -2, 0.5, 2) +#' ) +#' countRelevantEnrichments(df) +countRelevantEnrichments <- function(df, + pval_col = "p_val_adj", logfc_col = "avg_log2FC", + pval_cutoff = 1e-2, logfc_cutoff = 1) { + # + stopifnot(is.data.frame(df), + pval_col %in% colnames(df), + logfc_col %in% colnames(df), + is.numeric(pval_cutoff), + is.numeric(logfc_cutoff)) + + relevant_genes <- df |> + dplyr::filter(!!sym(pval_col) <= pval_cutoff) + + enriched_count <- relevant_genes |> + dplyr::filter(!!sym(logfc_col) >= logfc_cutoff) |> + nrow() + + depleted_count <- relevant_genes |> + dplyr::filter(!!sym(logfc_col) <= -logfc_cutoff) |> + nrow() + + return(list(enriched = enriched_count, depleted = depleted_count)) +} + + +# ________________________________________________________________________ +#' @title Perform GO Enrichment Analysis +#' +#' @description This function performs Gene Ontology (GO) enrichment analysis using the +#' `clusterProfiler::enrichGO` function. It takes the gene list, universe, organism database, +#' gene identifier type, and ontology type as inputs and returns the enrichment results. +#' +#' @param genes Character vector. List of genes for enrichment analysis. Default: NULL. +#' @param universe Character vector. Background gene list (universe). Default: NULL. +#' @param org_db Character. Organism-specific database to use (e.g., 'org.Hs.eg.db'). Default: 'org.Hs.eg.db'. +#' @param key_type Character. Gene identifier type (e.g., 'SYMBOL', 'ENTREZID'). Default: 'SYMBOL'. +#' @param ont Character. Ontology type to use (e.g., 'BP', 'MF', 'CC'). Default: 'BP'. +#' @param pAdjustMethod Character. Method for p-value adjustment. Default: 'BH' for Benjamini-Hochberg. +#' @param pvalueCutoff Numeric. P-value cutoff for significance. Default: 0.05. +#' @param qvalueCutoff Numeric. Q-value cutoff for significance. Default: 0.2. +#' @param save Logical. Save the results as a data frame. Default: `TRUE`.. +#' @param suffix Character. Suffix to append to the output file name. Default: 'GO.Enrichments'. +#' @param check.gene.symbols Logical. Check gene symbols for validity. Default: `TRUE`.. +#' @param ... Additional arguments to pass to `clusterProfiler::enrichGO`. +#' @importFrom clusterProfiler enrichGO +#' +#' @return A data frame with GO enrichment results. +#' @export +#' +#' @examples +#' \dontrun{ +#' gene_list <- rownames(df.m.DL.up.in.TSC) +#' background_genes <- names(all.genes) +#' go_results <- performGOEnrichment(gene_list, background_genes, "org.Hs.eg.db", "SYMBOL", "BP") +#' print(go_results) +#' } + +scGOEnrichment <- function(genes, universe = NULL, + org_db = "org.Hs.eg.db", key_type = "SYMBOL", ont = "BP", + pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.2, + save = TRUE, + suffix = NULL, + check.gene.symbols = TRUE, + ...) { + # Load required library + if (!requireNamespace("clusterProfiler", quietly = TRUE)) { + stop("The 'clusterProfiler' package is required but not installed.") + } + + # Input assertions + stopifnot( + is.character(genes) | is.null(genes), + (is.character(universe) | is.null(universe)), + (length(universe) > 100 | is.null(universe)), + is.character(org_db), is.character(key_type), is.character(ont), + is.character(ont) + ) + + if ( is.null(genes) | length(genes) == 0 ) return(NULL) + + # check.gene.symbols + if (check.gene.symbols) { + x <- checkGeneSymbols(genes, species = "human") + genes <- x[x[ , "Approved"] , 1] + } + + message("Performing enrichGO() analysis...") + message(length(genes), " approved genes of interest, in ", length(universe), " background genes.") + + go_results <- clusterProfiler::enrichGO( + gene = genes, + universe = universe, + pAdjustMethod = pAdjustMethod, + OrgDb = org_db, + keyType = key_type, + pvalueCutoff = pvalueCutoff, + qvalueCutoff = qvalueCutoff, + ont = ont, + ...) + + nr_of_enr_terms <- length(go_results@result$"ID") + + # Output assertions + if(nrow(go_results) < 1) warning("No enriched terms found!", immediate. = TRUE) + + if(save) xsave(go_results, suffix = kpp("enr", nr_of_enr_terms, suffix), + showMemObject = F, saveParams = F, allGenes = F) + message("\nNr of enriched terms: ", nr_of_enr_terms) + + return(go_results) +} + + + + +# ________________________________________________________________________ +#' @title Barplot GO Enrichment Results by enrichplot +#' +#' @description This function creates a bar plot of GO enrichment analysis results using the +#' `enrichplot::barplot.enrichResult` function. It also allows saving the plot to a file. +#' +#' @param df.enrichment Data frame. Enrichment results from GO analysis. Default: NULL. +#' @param tag Character. Tag to be added to the title of the plot. Default: "in ...". +#' @param universe Character. Background gene list (universe). Default: NULL. +#' @param title Character. Title of the plot. Default: "GO Enrichment Analysis" followed by `tag`. +#' @param subtitle Character. Subtitle of the plot. Default: NULL. +#' @param caption Character. Caption of the plot. Default: constructed from input parameters. +#' @param save Logical. Whether to save the plot to a file. Default: `TRUE`.. +#' @param h Height of the plot canvas, calculated as the height of an A4 page times `scale`; Default: `8.27 * scale`. +#' @param w Width of the plot canvas, calculated as the width of an A4 page times `scale`; Default: `11.69 * scale`. +#' @param also.pdf Save plot in both png and pdf formats. +#' @param ... Additional arguments passed to `enrichplot::barplot.enrichResult`. +#' +#' @import enrichplot +#' @importFrom ggplot2 labs +#' +#' @return None. The function prints the plot and optionally saves it. +#' @export +#' +#' @examples +#' \dontrun{ +#' df.enrichment <- data.frame() # Example enrichment results data frame +#' plotGOEnrichment(df.enrichment) +#' } + +scBarplotEnrichr <- function(df.enrichment, + tag = "...", + universe = NULL, + title = paste("GO Enriched Terms", tag), + subtitle = kppws("Input: ", substitute(df.enrichment)), + caption = paste0("Input genes: ", length(df.enrichment@'gene'), + " | Enriched terms: ", nrow(df.enrichment), + " | background genes: ", length(universe) ), + save = TRUE, + w = 10, h = 10, + also.pdf = F, + ...) { + + if(tag == "...") warning("Please provide a tag describing where are the enrichments.", immediate. = TRUE) + nr_input_genes <- length(df.enrichment@'gene') + + pobj <- + if(nrow(df.enrichment) < 1 | is.null (df.enrichment)) { + warning("No enriched terms input!", immediate. = TRUE) + ggplot() + theme_void() + annotate("text", x = 1, y = 1, label = "NO ENRICHMENT", + size = 8, color = "red", hjust = 0.5, vjust = 0.5) + } else if (nr_input_genes < 5) { + warning("Very few inputs for GOENR", immediate. = TRUE) + ggplot() + theme_void() + annotate("text", x = 1, y = 1, label = "TOO FEW GENES (<5)", + size = 8, color = "red", hjust = 0.5, vjust = 0.5) + + } else { + enrichplot:::barplot.enrichResult(df.enrichment, showCategory = 20) + } + pobj <- pobj + ggplot2::labs(title = title, subtitle = subtitle, caption = caption) + + if (save) { + qqSave(pobj, title = title, w = w, h = h, also.pdf = also.pdf) + } + + return(pobj) +} + + +# ________________________________________________________________________ +#' @title Filter GO Enrichment Results +#' +#' @description This function filters GO enrichment results based on adjusted p-value and q-value +#' cutoffs, and retrieves the descriptions of the filtered results. +#' +#' @param df.enrichments An object of class `enrichResult` containing the GO enrichment results. +#' @param colname Character. The name of the column containing the GO-term names, or else. +#' @param pvalueCutoff Numeric. The p-value cutoff for filtering the results. Default: NULL, meaning +#' that the default cutoff of the input object is used. It is stored in `df.enrichments@pvalueCutoff`. +#' @param qvalueCutoff Numeric. The q-value cutoff for filtering the results. Default: NULL, +#' meaning that the default cutoff of the input object is used. It is stored in `df.enrichments@qvalueCutoff`. +#' +#' @return A character vector of descriptions of the filtered GO enrichment results. +#' +#' @examples +#' # Assuming GO.Enriched.DL.Ctrl is an object of class `enrichResult` created by clusterprofiler or equivalent +#' descriptions <- filterGoEnrichment(GO.Enriched.DL.Ctrl) +#' print(descriptions) +#' +#' @importFrom dplyr filter pull +#' +#' @export +filterGoEnrichment <- function(df.enrichments, + pvalueCutoff = NULL, + qvalueCutoff = NULL, + colname = 'Description') { + # Input assertions + stopifnot( + "enrichResult" %in% class(df.enrichments), + !is.null(df.enrichments@result), + !is.null(df.enrichments@pvalueCutoff), + !is.null(df.enrichments@qvalueCutoff) + ) + + pvalueCutoff <- if(is.null(pvalueCutoff)) df.enrichments@pvalueCutoff else pvalueCutoff + qvalueCutoff <- if(is.null(qvalueCutoff)) df.enrichments@qvalueCutoff else qvalueCutoff + + message(paste("Filtering GO enrichment results with \np-value cutoff", + pvalueCutoff, "and q-value cutoff", qvalueCutoff)) + + # Filter and retrieve GO + descriptions <- df.enrichments@result |> + dplyr::filter(p.adjust < pvalueCutoff) |> + dplyr::filter(qvalue < qvalueCutoff) |> + dplyr::pull(!!sym(colname)) + + # Output assertions + stopifnot(is.character(descriptions)) + message("\nNr of enriched terms: ", length(descriptions)) + + return(descriptions) +} + +# Example usage +# Assuming GO.Enriched.DL.Ctrl is an object of class `enrichResult` +# descriptions <- filterGoEnrichment(GO.Enriched.DL.Ctrl) +# print(descriptions) + + +# ________________________________________________________________________ +#' @title Count Enriched and Depleted Genes +#' +#' @description This function counts the number of significantly enriched and depleted genes +#' based on the provided criteria. It filters the genes based on adjusted p-value and +#' logarithm of fold change. +#' +#' @param df A dataframe containing the result of the differential gene expression analysis. +#' @param min_padj A numeric value specifying the minimum adjusted p-value. Default: 0.01. +#' @param min_logFC A numeric value specifying the minimum logarithm to fold change. +#' Default: 0.5. This value should be positive and will be used as a negative value for +#' depleted genes. +#' @param colname.p A string specifying the column name for the adjusted p-value in the dataframe. +#' Default: 'p_val_adj'. +#' @param colname.lFC A string specifying the column name for the logarithm to fold change in +#' the dataframe. Default: 'avg_log2FC'. +#' +#' @return A list of two elements: +#' \item{GeneCounts}{A named numeric vector containing the numbers of enriched and depleted genes.} +#' \item{Parameters}{A named numeric vector containing the parameter names and their values.} +#' +#' @examples +#' df <- data.frame( +#' p_val = c(5.580902e-14, 4.607790e-12, 1.643436e-11), +#' avg_log2FC = c(0.4985875, 0.4983416, 0.4977825), +#' pct.1 = c(0.429, 0.575, 0.387), +#' pct.2 = c(0.251, 0.396, 0.232), +#' p_val_adj = c(1.091513e-09, 9.011916e-08, 3.214233e-07) +#' ) +#' result <- countEnrichedDepletedGenes(df) +#' print(result) +#' +#' @export + +countEnrichedDepletedGenes <- function(df, min_padj = 0.01, min_logFC = 0.5, + # genes = rownames(df), + colname.p = 'p_val_adj', colname.lFC = 'avg_log2FC') { + + stopifnot(min_padj > 0, + min_logFC > 0, + colname.p %in% colnames(df), + colname.lFC %in% colnames(df) + ) + + # Filter the dataframe for enriched genes + idx.enr <- df[[colname.p]] <= min_padj & df[[colname.lFC]] >= min_logFC + enriched_genes <- df[ idx.enr, ] + enriched_symbols <- rownames(enriched_genes) + # enriched_symbols <- genes[idx.enr] + + # Filter the dataframe for depleted genes + idx.depl <- (df[[colname.p]] <= min_padj & df[[colname.lFC]] <= -min_logFC) + + depleted_genes <- df[idx.depl, ] + depleted_symbols <- rownames(depleted_genes) + # depleted_symbols <- genes[idx.depl] + + # Create the named numeric vectors + gene_counts <- c('Enriched' = nrow(enriched_genes), 'Depleted' = nrow(depleted_genes)) + print("gene_counts"); print(gene_counts) + parameters <- c('min_padj' = min_padj, 'min_logFC' = min_logFC) + print("parameters"); print(parameters) + + # Create the list of gene symbols + gene_symbols <- list('Enriched' = enriched_symbols, 'Depleted' = depleted_symbols) + + # Return the results as a list + result <- list(gene_counts, parameters, gene_symbols) + names(result) <- c("GeneCounts", "Parameters", "GeneSymbols") + + return(result) +} + +# ________________________________________________________________________ + + + +# _________________________________________________________________________________________________ +# Helpers ______________________________ ---- +# _________________________________________________________________________________________________ + +#' @title Adjust Layout Parameters for multi* plotting fucntions +#' +#' @description Adjusts layout dimensions and properties based on the specified layout type. +#' Updates the provided environment with new dimensions and layout configuration. +#' +#' @param layout A string specifying the layout type. Can be either "tall" or "wide". Default: NULL. +#' @param scaling A numeric scaling factor to adjust the dimensions. Default: 1. +#' @param wA4 The width of the A4 paper in inches. Default: 8.27. +#' @param hA4 The height of the A4 paper in inches. Default: 11.69. +#' @param env The environment where the layout dimensions and properties should be assigned. +#' Default: parent.frame(). +#' +#' @return Invisible NULL. The function operates by side effects, updating the `env` environment. +#' @examples +#' env <- new.env() +#' .adjustLayout("tall", 1, 8.27, 11.69, env) +#' print(env$w) # Should print the width based on "tall" layout scaling. +#' +.adjustLayout <- function(layout, scaling, wA4, hA4, env) { + # Input checks + stopifnot( + is.character(layout), is.numeric(scaling), is.numeric(wA4), + is.numeric(hA4), is.environment(env), + layout %in% c("tall", "wide") + ) + + if (layout == "tall") { + assign("w", wA4 * scaling, envir = env) + assign("h", hA4 * scaling, envir = env) + assign("nr.Col", 2, envir = env) + assign("nr.Row", 4, envir = env) + message("tall layout active, nr.Col ignored.") + } else if (layout == "wide") { + assign("w", hA4 * scaling, envir = env) + assign("h", wA4 * scaling, envir = env) + assign("nr.Col", 2, envir = env) # Adjusted for consistency with wide layout explanation + assign("nr.Row", 2, envir = env) + message("wide layout active, nr.Col ignored.") + } else { + message("No specific layout selected, defaulting to input parameters.") + } +} + + +# _________________________________________________________________________________________________ +# Saving plots ______________________________ ---- +# _________________________________________________________________________________________________ + + + +# _________________________________________________________________________________________________ +#' @title Save Two Plots on One A4 Page +#' +#' @description Arranges and saves two UMAP plots (or any plots) side-by-side or one above +#' the other on a single A4 page. +#' +#' @param plot_list A list containing ggplot objects to be arranged and saved. +#' @param pname Boolean indicating if the plot name should be automatically generated; +#' if FALSE, the name is based on `plot_list` and `suffix`; Default: `FALSE`.. +#' @param suffix Suffix to be added to the generated filename if `pname` is FALSE; Default: NULL. +#' @param scale Scaling factor for adjusting the plot size; Default: 1. +#' @param nrow Number of rows in the plot arrangement; Default: 2. +#' @param ncol Number of columns in the plot arrangement; Default: 1. +#' @param h Height of the plot, calculated as A4 height times `scale`; +#' calculated dynamically based on `scale`. +#' @param w Width of the plot, calculated as A4 width times `scale`; +#' calculated dynamically based on `scale`. +#' @param ... Additional parameters passed to `plot_grid`. +#' +#' @examples +#' \dontrun{ +#' p1 <- ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + +#' geom_point() +#' p2 <- ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + +#' geom_point() +#' save2plots.A4(plot_list = list(p1, p2)) +#' } +#' +#' @export +#' @importFrom cowplot plot_grid save_plot ggdraw +#' @importFrom ggplot2 theme +save2plots.A4 <- function( + plot_list, pname = FALSE, suffix = NULL, scale = 1, + nrow = 2, ncol = 1, + h = 11.69 * scale, w = 8.27 * scale, ...) { + if (pname == FALSE) pname <- sppp(substitute(plot_list), suffix) + p1 <- cowplot::plot_grid( + plotlist = plot_list, nrow = nrow, ncol = ncol, + labels = LETTERS[1:length(plot_list)], ... + ) + p1 <- cowplot::ggdraw(p1) + + theme(plot.background = element_rect(fill = "white", color = NA)) + + iprint("Saved as:", pname) + + save_plot(plot = p1, filename = extPNG(pname), base_height = h, base_width = w) +} + +# _________________________________________________________________________________________________ +#' @title Save Four Plots on One A4 Page +#' +#' @description Arranges and saves four plots (e.g. UMAPs) onto a single A4 page, allowing for a +#' compact comparison of different visualizations or clustering results. +#' +#' @param plot_list A list containing ggplot objects to be arranged and saved; each object represents one panel. +#' @param pname Plot name; if FALSE, a name is generated automatically based on `plot_list` and `suffix`; Default: `FALSE`.. +#' @param suffix Suffix to be added to the filename; Default: NULL. +#' @param scale Scaling factor for adjusting the size of the overall plot canvas; Default: 1. +#' @param nrow Number of rows to arrange the plots in; Default: 2. +#' @param ncol Number of columns to arrange the plots in; Default: 2. +#' @param h Height of the plot canvas, calculated as the height of an A4 page times `scale`; Default: `8.27 * scale`. +#' @param w Width of the plot canvas, calculated as the width of an A4 page times `scale`; Default: `11.69 * scale`. +#' @param ... Additional parameters passed to `plot_grid`. +#' +#' @examples +#' \dontrun{ +#' p1 <- ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + +#' geom_point() +#' p2 <- ggplot(mtcars, aes(mpg, disp, color = as.factor(cyl))) + +#' geom_point() +#' p3 <- ggplot(mpg, aes(displ, hwy, color = class)) + +#' geom_point() +#' p4 <- ggplot(diamonds, aes(carat, price, color = cut)) + +#' geom_point() +#' save4plots.A4(plot_list = list(p1, p2, p3, p4)) +#' } +#' +#' @export +#' @importFrom cowplot plot_grid save_plot ggdraw +#' @importFrom ggplot2 theme +save4plots.A4 <- function( + plot_list, pname = FALSE, suffix = NULL, scale = 1, + nrow = 2, ncol = 2, + h = 8.27 * scale, w = 11.69 * scale, + ...) { + if (pname == FALSE) pname <- sppp(substitute(plot_list), suffix) + p1 <- cowplot::plot_grid( + plotlist = plot_list, nrow = nrow, ncol = ncol, + labels = LETTERS[1:length(plot_list)], ... + ) + # https://stackoverflow.com/questions/13691415/change-the-background-color-of-grid-arrange-output + p1 <- cowplot::ggdraw(p1) + + theme(plot.background = element_rect(fill = "white", color = NA)) + + iprint("Saved as:", pname) + # fname <- MarkdownHelpers::ww.FnP_parser(extPNG(pname) ) + save_plot(plot = p1, filename = extPNG(pname), base_height = h, base_width = w) +} + + + +# _________________________________________________________________________________________________ +#' @title qqSaveGridA4 +#' +#' @description Saves a grid of 2 or 4 ggplot objects onto an A4 page. +#' @param plotlist A list of ggplot objects. Default: pl. +#' @param plots A numeric vector indicating the indices of the plots to save from the 'plotlist'. Default: 1:2. +#' @param NrPlots Number of plots to save. Default: length(plots). +#' @param height Height for the saved image. Default: 11.69. +#' @param width Width for the saved image. Default: 8.27. +#' @param fname File name for the saved image. Default: "Fractions.Organoid-to-organoid variation.png". +#' @param ... Additional arguments passed to the plot_grid function. +#' @return This function does not return a value. It saves a grid plot of ggplot objects to the specified file. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' qqSaveGridA4(plotlist = pl, plots = 1:2, fname = "Fractions.per.Cl.png") +#' qqSaveGridA4(plotlist = pl, plots = 1:4, fname = "Fractions.per.Cl.4.png") +#' } +#' } +#' @seealso +#' \code{\link[cowplot]{plot_grid}} +#' @importFrom cowplot plot_grid +#' +#' @export +qqSaveGridA4 <- function( + plotlist = pl, + plots = 1:2, NrPlots = length(plots), height = 11.69, width = 8.27, + fname = "Fractions.Organoid-to-organoid variation.png", ...) { + stopifnot(NrPlots %in% c(2, 4)) + iprint(NrPlots, "plots found,", plots, "are saved.") + pg.cf <- cowplot::plot_grid(plotlist = plotlist[plots], nrow = 2, ncol = NrPlots / 2, labels = LETTERS[1:NrPlots], ...) + if (NrPlots == 4) list2env(list(height = width, width = height), envir = as.environment(environment())) + save_plot( + filename = fname, + plot = pg.cf, base_height = height, base_width = width + ) + MarkdownHelpers::ww.FnP_parser(fname) +} + + + +# _________________________________________________________________________________________________ +# plotting.dim.reduction.3D.R ______________________________ ---- +# _________________________________________________________________________________________________ +# source('~/GitHub/Packages/Seurat.utils/Functions/plotting.dim.reduction.3D.R') +# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Plotting.dim.reduction.3D.R")) +# Source: self + https://github.com/Dragonmasterx87/Interactive-3D-Plotting-in-Seurat-3.0.0 + +# Requirements __________________________________________ +# try(library(plotly), silent = TRUE) +# try(library(MarkdownReports), silent = TRUE) +# try(library(htmlwidgets), silent = TRUE) + +# May also require +# try (source('~/GitHub/Packages/CodeAndRoll/CodeAndRoll.R'),silent= TRUE) # generic utilities funtions +# require('MarkdownReports') # require("devtools") + + + +# _________________________________________________________________________________________________ +#' @title Check Quantile Cutoff and Clip Outliers +#' +#' @description Checks a specified quantile cutoff and clips outliers from an expression vector, +#' ensuring that a minimum number of cells expressing a gene remain. +#' +#' @param expr.vec A numeric vector representing gene expression data. +#' @param quantileCutoffX The quantile cutoff for clipping outliers. +#' @param min.cells.expressing The minimum number of cells that should remain expressing after clipping. +#' @return The expression vector with outliers clipped, ensuring the minimum number of cells expressing. +#' @examples +#' \dontrun{ +#' expr.vec <- c(...) +#' quantileCutoff <- 0.99 +#' min.cells.expressing <- 10 +#' ww.check.quantile.cutoff.and.clip.outliers(expr.vec, quantileCutoff, min.cells.expressing) +#' } +#' @export +#' @importFrom CodeAndRoll2 clip.outliers.at.percentile +#' +ww.check.quantile.cutoff.and.clip.outliers <- function(expr.vec = plotting.data[, gene], + quantileCutoffX = quantileCutoff, + min.cells.expressing = 10) { + expr.vec.clipped <- + CodeAndRoll2::clip.outliers.at.percentile(expr.vec, probs = c(1 - quantileCutoffX, quantileCutoffX)) + if (sum(expr.vec.clipped > 0) > min.cells.expressing) { + expr.vec <- expr.vec.clipped + } else { + iprint("WARNING: quantile.cutoff too stringent, would leave <", min.cells.expressing, "cells. It is NOT applied.") + } + return(expr.vec) +} + + +# _________________________________________________________________________________________________ +#' @title plot3D.umap.gene +#' +#' @description Plot a 3D umap with gene expression. Uses plotly. Based on github.com/Dragonmasterx87. +#' @param gene The gene of interest. Default: 'TOP2A' +#' @param obj The Seurat object for which the 3D umap plot will be generated. Default: combined.obj +#' @param quantileCutoff Cutoff value for the quantile for clipping outliers in the gene expression data. Default: 0.99 +#' @param def.assay The default assay to be used. Choose between "integrated" and "RNA". Default: "RNA" +#' @param suffix A suffix added to the filename. Default: NULL +#' @param annotate.by The cluster or grouping to be used for automatic annotation. Default: First returned result from GetNamedClusteringRuns(obj) function. +#' @param alpha Opacity of the points in the plot. Default: 0.5 +#' @param dotsize The size of the dots in the plot. Default: 1.25 +#' @param ... Pass any other parameter to the internally called `plotly::plot_ly`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' plot3D.umap.gene(obj = combined.obj, gene = "DDIT4", quantileCutoff = .95) +#' plot3D.umap.gene(obj = combined.obj, gene = "percent.mito", quantileCutoff = .95) # for continous meta variables +#' plot3D.umap.gene(obj = combined.obj, gene = "nFeature_RNA", quantileCutoff = .95) # for continous meta variables +#' } +#' } +#' @importFrom plotly plot_ly layout +#' @importFrom Seurat FetchData +#' +#' @export + +plot3D.umap.gene <- function( + gene = "TOP2A", + obj = combined.obj, + annotate.by = GetNamedClusteringRuns(obj = obj, v = F)[1], + quantileCutoff = .99, + def.assay = c("integrated", "RNA")[2], + suffix = NULL, + alpha = .5, + dotsize = 1.25, + col.names = c("umap_1", "umap_2", "umap_3"), + assay = 'RNA', + ...) { + # Input assertions ____________________________________ + + stopifnot( + is(obj, "Seurat"), + is.character(gene), + "gene or feature not found in obj" = (gene %in% Features(obj, assay = assay) | gene %in% colnames(obj@meta.data)), + "annotate.by not found in @meta" = (annotate.by %in% colnames(obj@meta.data) | annotate.by == FALSE), + "reductions.backup is missing from @misc" = is.list(obj@misc$"reductions.backup"), + "umap3d is missing from @misc$reductions.backup" = is(obj@misc$reductions.backup$"umap3d", class2 = "DimReduc"), + "reductionn has 3 columns" = (ncol(obj@misc$reductions.backup$"umap3d") == 3), + "3D reduction has >/< cells than object" = (ncol(obj) == nrow(obj@misc$reductions.backup$"umap3d"@cell.embeddings)) + ) + + if (obj@version < "5") col.names <- toupper(col.names) + message("Obj. version: ", obj@version, " \ndim names: ", kppc(col.names)) + + DefaultAssay(object = obj) <- def.assay + iprint(DefaultAssay(object = obj), "assay") + + # Get and format 3D plotting data ____________________________________ + plotting.data <- obj@misc$reductions.backup$"umap3d"@cell.embeddings + colnames(plotting.data) <- toupper(col.names) + + + Expression <- Seurat::FetchData(object = obj, vars = gene) + plotting.data <- cbind(plotting.data, Expression) + + plotting.data$"Expression" <- ww.check.quantile.cutoff.and.clip.outliers( + expr.vec = plotting.data[, gene], + quantileCutoffX = quantileCutoff, min.cells.expressing = 10 + ) + + # CodeAndRoll2::clip.outliers.at.percentile(plotting.data[, gene], probs = c(1 - quantileCutoff, quantileCutoff)) + plotting.data$"label" <- paste(rownames(plotting.data), " - ", plotting.data[, gene], sep = "") + + ls.ann.auto <- if (annotate.by != FALSE) { + .Annotate4Plotly3D(obj = obj, plotting.data. = plotting.data, annotation.category = annotate.by) + } else { + NULL + } + + plt <- plotly::plot_ly( + data = plotting.data, + x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3, + type = "scatter3d", + mode = "markers", + marker = list(size = dotsize), + text = ~label, + color = ~Expression, + opacity = alpha + # , colors = c('darkgrey', 'red') + , colorscale = "Viridis" + # , hoverinfo="text" + , ... + ) |> + plotly::layout(title = gene, scene = list(annotations = ls.ann.auto)) + + SavePlotlyAsHtml(plt, category. = gene, suffix. = suffix) + return(plt) +} + + + + +# _________________________________________________________________________________________________ +#' @title plot3D.umap +#' +#' @description Plot a 3D umap based on one of the metadata columns. Uses plotly. Based on github.com/Dragonmasterx87. +#' @param category The metadata column based on which the 3D UMAP will be plotted. +#' Default: First returned result from GetNamedClusteringRuns(obj) function. +#' @param obj The Seurat object for which the 3D umap plot will be generated. Default: combined.obj +#' @param suffix A suffix added to the filename. Default: NULL +#' @param annotate.by The cluster or grouping to be used for automatic annotation. +#' Default: First returned result from GetNamedClusteringRuns(obj) function. +#' @param dotsize The size of the dots in the plot. Default: 1.25 +#' @param ... Pass any other parameter to the internally called `plotly::plot_ly`. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' plot3D.umap(category = "integrated_snn_res.0.1", obj = combined.obj) +#' } +#' } +#' @importFrom plotly plot_ly layout +#' @importFrom Seurat FetchData +#' +#' @export + +plot3D.umap <- function( + obj = combined.obj, + category = GetNamedClusteringRuns(obj = obj, v = F)[1], + annotate.by = category, + suffix = NULL, + dotsize = 1.25, + col.names = c("umap_1", "umap_2", "umap_3"), + ...) { + + message("category: ", category) + message("annotate.by: ", annotate.by) + + + + # Input assertions ____________________________________ + stopifnot( + is(obj, "Seurat"), + category %in% colnames(obj@meta.data), + annotate.by %in% colnames(obj@meta.data), + "reductions.backup is missing from @misc" = is.list(obj@misc$"reductions.backup"), + "umap3d is missing from @misc$reductions.backup" = is(obj@misc$reductions.backup$"umap3d", class2 = "DimReduc"), + "reductionn has 3 columns" = (ncol(obj@misc$reductions.backup$"umap3d") == 3), + "3D reduction has >/< cells than object" = (ncol(obj) == nrow(obj@misc$reductions.backup$"umap3d"@cell.embeddings)) + ) + + if (obj@version < "5") col.names <- toupper(col.names) + message("Obj. version: ", obj@version, " \ndim names: ", kppc(col.names)) + + # Get and format 3D plotting data ____________________________________ + plotting.data <- obj@misc$reductions.backup$"umap3d"@cell.embeddings # plotting.data <- Seurat::FetchData(object = obj, vars = c(col.names, category)) + colnames(plotting.data) <- toupper(col.names) + + plotting.data <- cbind(plotting.data, obj[[category]]) + colnames(plotting.data)[4] <- "category" + plotting.data$label <- paste(rownames(plotting.data)) # Make a column of row name identities (these will be your cell/barcode names) + + ls.ann.auto <- if (annotate.by != FALSE) { + .Annotate4Plotly3D(obj = obj, plotting.data. = plotting.data, annotation.category = annotate.by) + } else { + NULL + } + + plt <- plotly::plot_ly( + data = plotting.data, + x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3, + type = "scatter3d", + mode = "markers", + marker = list(size = dotsize), + text = ~label, + color = ~category, + colors = gg_color_hue(length(unique(plotting.data$"category"))) + # , hoverinfo="text" + , ... + ) |> + plotly::layout(title = category, scene = list(annotations = ls.ann.auto)) + + SavePlotlyAsHtml(plt, category. = category, suffix. = suffix) + return(plt) +} + + +# _________________________________________________________________________________________________ +#' @title SavePlotlyAsHtml +#' +#' @description Save a Plotly 3D scatterplot as an HTML file. +#' @param plotly_obj The Plotly object to save. +#' @param category The category of the plot. +#' @param suffix A suffix to add to the filename. +#' @param OutputDir The output directory. +#' @seealso +#' \code{\link[htmlwidgets]{saveWidget}} +#' @examples \dontrun{ +#' plt <- plotly::plot_ly("some stuff") +#' SavePlotlyAsHtml(plt, category. = "label.categ", suffix. = "test") +#' } +#' +#' @export +#' @importFrom htmlwidgets saveWidget +SavePlotlyAsHtml <- function(plotly_obj, category. = category, suffix. = NULL) { # Save Plotly 3D scatterplot as an html file. + OutputDir <- if (exists("OutDir")) OutDir else getwd() + name.trunk <- kpp("umap.3D", category., suffix., idate(), "html") + fname <- kpps(OutputDir, name.trunk) + iprint("Plot saved as:", fname) + htmlwidgets::saveWidget(plotly_obj, file = fname, selfcontained = TRUE, title = category.) +} + + +# _________________________________________________________________________________________________ +#' @title Backup Dimensionality Reduction Data +#' +#' @description This function is mostly used internally.It stores a backup of specified +#' dimensionality reduction data (e.g., UMAP, tSNE, PCA) +#' within the Seurat object, from `obj@reductions$umap` to the `@misc$reductions.backup` slot. This +#' allows to store 2D and 3D UMAP visualizations in parallel and easily switch between them via +#' the `RecallReduction` function. +#' +#' @param obj Seurat object containing dimensionality reduction data; Default: `combined.obj`. +#' @param dim Number of dimensions to include in the backup; Default: 2. +#' @param reduction Type of dimensionality reduction to backup ('umap', 'tsne', 'pca'); Default: 'umap'. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' obj <- BackupReduction(obj = obj, dim = 2, reduction = "umap") +#' } +#' } +#' +#' @export +BackupReduction <- function(obj = combined.obj, dim = 2, reduction = "umap") { # Backup UMAP to `obj@misc$reductions.backup` from `obj@reductions$umap`. + if (is.null(obj@misc$"reductions.backup")) obj@misc$"reductions.backup" <- list() + dslot <- paste0(reduction, dim, "d") + obj@misc$reductions.backup[[dslot]] <- obj@reductions[[reduction]] + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title SetupReductionsNtoKdimensions +#' +#' @description Function to calculate N-to-K dimensional umaps (default = 2:3); and back them up to +#' slots `obj@misc$reductions.backup` from @reductions$umap +#' @param obj A Seurat object. Default: combined.obj +#' @param nPCs A numeric value representing the number of principal components to use. Default: p$n.PC +#' @param dimensions A numeric vector specifying the dimensions to use for the dimensionality reductions. Default: 3:2 +#' @param reduction_input The type of dimensionality reduction to use as input. Can be "pca", or +#' some correctionn results, like harmony pca. +#' @param reduction_output Te type of dimensionality reduction to perform. Can be "umap", "tsne", +#' or "pca". Default: 'umap' +#' @return The input Seurat object with computed dimensionality reductions and backups of these reductions. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- SetupReductionsNtoKdimensions(obj = combined.obj, nPCs = 10, dimensions = 2:3, reduction = "umap") +#' } +#' } +#' @importFrom tictoc tic toc +#' +#' @export +SetupReductionsNtoKdimensions <- function(obj = combined.obj, nPCs = p$"n.PC", dimensions = 3:2, + reduction_input = "pca", reduction_output = "umap", ...) { + tictoc::tic() + for (d in dimensions) { + iprint(d, "dimensional", reduction_output, "is calculated") + obj <- if (reduction_output == "umap") { + RunUMAP(obj, dims = 1:nPCs, reduction = reduction_input, n.components = d, ...) + } else if (reduction_output == "tsne") { + RunTSNE(obj, dims = 1:nPCs, reduction = reduction_input, n.components = d, ...) + } else if (reduction_output == "pca") { + RunPCA(obj, dims = 1:nPCs, n.components = d, ...) + } + obj <- BackupReduction(obj = obj, dim = d, reduction = reduction_output) + } + + tictoc::toc() + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Recall Dimensionality Reduction from backup slot +#' +#' @description Restores dimensionality reduction data (e.g., UMAP, tSNE, PCA) from a backup +#' stored within `obj@misc$reductions.backup` to the active `obj@reductions` slot. +#' +#' @param obj Seurat object from which the backup will be restored; Default: `combined.obj`. +#' @param dim Number of dimensions of the reduction data to restore; Default: 2. +#' @param reduction Type of dimensionality reduction to be restored ('umap', 'tsne', 'pca'); Default: 'umap'. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' combined.obj <- RecallReduction(obj = combined.obj, dim = 2, reduction = "umap") +#' qUMAP() +#' combined.obj <- RecallReduction(obj = combined.obj, dim = 3, reduction = "umap") +#' qUMAP() +#' } +#' } +#' +#' @export +RecallReduction <- function(obj = combined.obj, dim = 2, reduction = "umap") { + dslot <- paste0(reduction, dim, "d") + reduction.backup <- obj@misc$reductions.backup[[dslot]] + msg <- paste(dim, "dimensional", reduction, "from obj@misc$reductions.backup") + stopif(is.null(reduction.backup), message = paste0(msg, " is NOT FOUND")) + iprint(msg, "is set active. ") + stopifnot(dim == ncol(reduction.backup)) + obj@reductions[[reduction]] <- reduction.backup + return(obj) +} + + + +# _________________________________________________________________________________________________ +#' @title .Annotate4Plotly3D +#' +#' @description Internal helper function. Create annotation labels for 3D plots. +#' Source https://plot.ly/r/text-and-annotations/#3d-annotations. +#' @param obj The Seurat object for which the 3D plot annotations will be generated. Default: combined.obj +#' @param plotting.data. The data frame containing plotting data. +#' @param annotation.category The category for which the annotation is generated. +#' @export +#' @importFrom dplyr group_by summarise +#' @importFrom Seurat FetchData + +.Annotate4Plotly3D <- function( + obj = combined.obj, + plotting.data., + annotation.category) { + stopifnot( + "annotation.category is missing" = !is.null(annotation.category), + "plotting.data. is missing" = !is.null(plotting.data.), + "annotation.category is not in meta.data" = annotation.category %in% colnames(obj@meta.data) + ) + + plotting.data.$"annot" <- Seurat::FetchData(object = obj, vars = c(annotation.category))[, 1] + auto_annot <- + plotting.data. |> + group_by(annot) |> + summarise( + showarrow = FALSE, + xanchor = "left", + xshift = 10, + opacity = 0.7, + "x" = mean(UMAP_1), + "y" = mean(UMAP_2), + "z" = mean(UMAP_3) + ) + names(auto_annot)[1] <- "text" + ls.ann.auto <- apply(auto_annot, 1, as.list) + return(ls.ann.auto) +} + +# _________________________________________________________________________________________________ +#' @title Plot3D.ListOfGenes +#' +#' @description Plot and save list of 3D UMAP or tSNE plots using plotly. +#' @param obj Seurat object to be used for the plot. Default: combined.obj +#' @param annotate.by Variable to annotate the clusters by. Default: 'integrated_snn_res.0.7' +#' @param opacity Opacity for the plot points. Default: 0.5 +#' @param cex Point size for the plot. Default: 1.25 +#' @param default.assay Default assay to be used from the Seurat object. Default: second entry from c("integrated", "RNA") +#' @param ListOfGenes List of genes to be plotted. Default: c("BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4") +#' @param SubFolderName Name of the subfolder where the plots will be saved. Default: a subfolder named 'plot3D' concatenated with the list of genes. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' CellTypeMarkers <- c("PGK1", "CTIP2" = "BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4", "TOP2A", "PTGDS", "EDNRB", "EGFR", "SCGN", "NR2F2", "EMX2", "GAD2", "DLX2", "SATB2") +#' Plot3D.ListOfGenes(obj = combined.obj, ListOfGenes = CellTypeMarkers) +#' } +#' } +#' @export +Plot3D.ListOfGenes <- function( + obj = combined.obj # Plot and save list of 3D UMAP ot tSNE plots using plotly. + , annotate.by = "integrated_snn_res.0.7", opacity = 0.5, cex = 1.25, default.assay = c("integrated", "RNA")[2], + ListOfGenes = c("BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4"), + SubFolderName = ppp("plot3D", substitute(ListOfGenes))) { + try(create_set_SubDir(SubFolderName)) + obj. <- obj + rm("obj") + stopifnot(annotate.by %in% c(colnames(obj.@meta.data), FALSE)) + + DefaultAssay(object = obj.) <- default.assay + MissingGenes <- setdiff(ListOfGenes, rownames(obj.)) + if (length(MissingGenes)) iprint("These genes are not found, and omitted:", MissingGenes, ". Try to change default assay.") + ListOfGenes <- intersect(ListOfGenes, rownames(obj.)) + + for (i in 1:length(ListOfGenes)) { + g <- ListOfGenes[i] + print(g) + plot3D.umap.gene(obj = obj., gene = g, annotate.by = annotate.by, alpha = opacity, def.assay = default.assay, dotsize = cex) + } + try(oo()) + try(create_set_Original_OutDir(NewOutDir = ParentDir)) +} + + +# _________________________________________________________________________________________________ +#' @title Plot3D.ListOfCategories +#' +#' @description This function plots and saves a list of 3D UMAP or tSNE plots using plotly. +#' @param obj A Seurat object for which the plot is to be created. Default: 'combined.obj'. +#' @param annotate.by Character vector specifying the metadata column to be used for annotating the plot. Default: 'integrated_snn_res.0.7'. +#' @param cex Numeric value specifying the point size on the plot. Default: 1.25. +#' @param default.assay Character vector specifying the assay to be used. Default: 'RNA' (second element in the vector c("integrated", "RNA")). +#' @param ListOfCategories Character vector specifying the categories to be included in the plot. Default categories are "v.project", "experiment", "Phase", "integrated_snn_res.0.7". +#' @param SubFolderName String specifying the name of the subfolder where the plots will be saved. By default, it's created using the function ppp("plot3D", substitute(ListOfCategories)). +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' categ3Dplots <- c("v.project", "experiment", "Phase", "integrated_snn_res.0.7", "Area", "Individual", "Type") +#' Plot3D.ListOfCategories(obj = combined.obj, ListOfCategories = categ3Dplots) +#' } +#' } +#' @export +Plot3D.ListOfCategories <- function( + obj = combined.obj # Plot and save list of 3D UMAP ot tSNE plots using plotly. + , annotate.by = "integrated_snn_res.0.7", cex = 1.25, default.assay = c("integrated", "RNA")[2], + ListOfCategories = c("v.project", "experiment", "Phase", "integrated_snn_res.0.7"), + SubFolderName = ppp("plot3D", substitute(ListOfCategories))) { + try(create_set_SubDir(SubFolderName)) + obj. <- obj + rm("obj") + stopifnot(annotate.by %in% colnames(obj.@meta.data)) + DefaultAssay(object = obj.) <- default.assay + + MissingCateg <- setdiff(ListOfCategories, colnames(obj.@meta.data)) + if (length(MissingCateg)) iprint("These metadata categories are not found, and omitted:", MissingCateg, ". See colnames(obj@meta.data).") + ListOfCategories <- intersect(ListOfCategories, colnames(obj.@meta.data)) + + for (i in 1:length(ListOfCategories)) { + categ <- ListOfCategories[i] + print(categ) + plot3D.umap(obj = obj., category = categ, annotate.by = annotate.by, dotsize = cex) + } + try(oo()) + try(create_set_Original_OutDir(NewOutDir = ParentDir)) +} + + +# _________________________________________________________________________________________________ +# TEMPORARY ______________________________ ---- +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +#' @title Display Correlation Values in Pairs Plot +#' +#' @description This function displays the correlation coefficient and significance level within +#' a scatterplot generated by the `pairs()` function. The default correlation method is Pearson, +#' but Kendall or Spearman methods can also be selected. +#' +#' @param x Numeric vector or the first half of the data pair. +#' @param y Numeric vector or the second half of the data pair. +#' @param digits Number of significant digits to display in the correlation coefficient. +#' Default: 2. +#' @param prefix A string prefix added before the correlation coefficient. Default: "". +#' @param cex.cor The character expansion factor for the correlation coefficient text. +#' This argument directly influences the text size. Default: 2. +#' @param method The method of correlation coefficient calculation. It can be "pearson" (default), +#' "kendall", or "spearman". +#' +#' @return This function does not return a value but modifies the current plot by adding the +#' correlation coefficient and its significance level. +#' +#' @examples +#' \dontrun{ +#' pairs(mtcars[, 1:4], panel = panelCorPearson) +#' } +#' @importFrom graphics text par +#' @importFrom stats cor cor.test +#' @export + +panelCorPearson <- function(x, y, digits = 2, prefix = "", cex.cor = 2, method = "pearson") { + # Input validation + stopifnot( + is.numeric(x), is.numeric(y), + is.numeric(digits) && digits > 0, + is.character(prefix), + is.numeric(cex.cor) && cex.cor > 0, + method %in% c("pearson", "kendall", "spearman") + ) + + usr <- par("usr") + on.exit(par(usr)) + par(usr = c(0, 1, 0, 1)) + r <- abs(cor(x, y, method = method, use = "complete.obs")) + txt <- format(c(r, 0.123456789), digits = digits)[1] + txt <- paste(prefix, txt, sep = "") + if (missing(cex.cor)) cex <- 0.8 / strwidth(txt) + + test <- cor.test(x, y, method = method) + Signif <- symnum(test$p.value, + corr = FALSE, na = FALSE, + cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), + symbols = c("***", "**", "*", ".", " ") + ) + + cex <- ifelse(missing(cex.cor), 0.8 / strwidth(txt), cex.cor) + text(0.5, 0.5, txt, cex = cex * r) + text(.8, .8, Signif, cex = cex, col = 2) +} + + + +# _________________________________________________________________________________________________ +#' @title suPlotVariableFeatures for Single Seurat Object +#' +#' @description Generates a Variable Feature Plot for a specified Seurat object, labels points with +#' the top 20 variable genes, and saves the plot to a PDF file. +#' +#' @param obj A single Seurat object. +#' @param NrVarGenes A vector containing the top 20 variable genes for the Seurat object. +#' @param repel A logical value indicating whether to repel the labels to avoid overlap. Default: `TRUE`.. +#' @param plotWidth Numeric value specifying the width of the plot when saved. Default: 7. +#' @param plotHeight Numeric value specifying the height of the plot when saved. Default: 5. +#' @param save A logical value indicating whether to save the plot to a PDF file. Default: `TRUE`.. +#' @param suffix A string suffix to append to the plot filename. Default: NULL. +#' @param assay The assay to use for the plot. Default: DefaultAssay(obj). +#' @param ... Additional arguments to pass to the Seurat::VariableFeaturePlot function. +#' +#' @examples +#' \dontrun{ +#' suPlotVariableFeatures(combined.obj) +#' } +#' @export +suPlotVariableFeatures <- function(obj = combined.obj, NrVarGenes = 15, + repel = TRUE, plotWidth = 7, plotHeight = 5, save = TRUE, + # suffix = kpp("nVF", .getNrScaledFeatures(obj)), + assay = DefaultAssay(obj), + suffix = NULL, + ...) { + message(" > Running suPlotVariableFeatures()...") + message(length(Cells(obj)), " cells | assay: ", assay, " | NrVarGenes: ", NrVarGenes) + + stopifnot( + is(obj, "Seurat"), is.function(ppp), is.logical(repel), + is.numeric(plotWidth), is.numeric(plotHeight) + ) + + obj.name <- deparse(substitute(obj)) + + plot1 <- Seurat::VariableFeaturePlot(obj, assay = assay, ...) + + theme(panel.background = element_rect(fill = "white")) + + labs(title = "Variable Genes", + subtitle = kppws(obj.name, suffix), + caption = paste("Assay:", assay, "|", idate())) + + + # Assuming LabelPoints is defined elsewhere and available for use. + TopVarGenes <- VariableFeatures(obj, assay = assay)[1:NrVarGenes] + labeledPlot <- LabelPoints( + plot = plot1, points = TopVarGenes, repel = repel, + xnudge = 0, ynudge = 0, max.overlaps = 15 + ) + + print(labeledPlot) + filename <- ppp("Var.genes", obj.name, suffix, idate(), "png") + + # if (save) ggplot2::ggsave(plot = labeledPlot, filename = filename, width = plotWidth, height = plotHeight) + if (save) { + qqSave( + ggobj = labeledPlot, + # title = plotname, + fname = filename, ext = ext, + w = plotWidth, h = plotHeight, also.pdf = FALSE + ) + } +} + + +# Notes -------------------------------------------------------------------------------------------- + +# plotMetadataCategPie() is in Seurat.Utils.Metadata.R + + diff --git a/R/Seurat.utils.less.used.R b/R/Seurat.utils.less.used.R index 1e4198c..53c9b17 100644 --- a/R/Seurat.utils.less.used.R +++ b/R/Seurat.utils.less.used.R @@ -2,6 +2,7 @@ # Seurat.utils.less.used.R ---- # ____________________________________________________________________ # file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") +# setwd("~/GitHub/Packages/Seurat.utils") @@ -12,16 +13,16 @@ #' containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, #' (2) converts them to Seurat objects, and (3) saves them as .RDS files. #' @param InputDir A character string specifying the input directory. -#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default is FALSE. -#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default is 'filtered_feature'. -#' @param min.cells An integer value specifying the minimum number of cells. Default is 5. -#' @param min.features An integer value specifying the minimum number of features. Default is 200. -#' @param updateHGNC A logical value indicating whether to update the HGNC. Default is TRUE. -#' @param ShowStats A logical value indicating whether to show statistics. Default is TRUE. -#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default is TRUE. -#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default is 2. -#' @param sample.barcoding A logical value indicating whether Cell Ranger was run with sample barcoding. Default is FALSE. -#' @param sort_alphanumeric sort files alphanumeric? Default: TRUE. +#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: `FALSE`. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 5. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`. +#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: `TRUE`. +#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2. +#' @param sample.barcoding A logical value indicating whether Cell Ranger was run with sample barcoding. Default: `FALSE`. +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) Convert10Xfolders(InputDir) @@ -70,7 +71,7 @@ Convert10Xfolders_v1 <- function( print("") print(fnameIN) - count_matrix <- Read10X(pathIN ) + count_matrix <- Read10X(pathIN) if (!is.list(count_matrix) | length(count_matrix) == 1) { seu <- CreateSeuratObject( counts = count_matrix, project = fnameIN, @@ -125,13 +126,13 @@ Convert10Xfolders_v1 <- function( #' @description Plot a UMAP and tSNE side by side. #' @param obj Seurat object. Default: combined.obj #' @param grouping Variable to group cells by. Default: 'res.0.6' -#' @param no_legend Logical, whether to display legend. Default: FALSE -#' @param do_return Logical, whether to return plot object. Default: TRUE -#' @param do_label Logical, whether to display labels. Default: TRUE +#' @param no_legend Logical, whether to display legend. Default: `FALSE`. +#' @param do_return Logical, whether to return plot object. Default: `TRUE`. +#' @param do_label Logical, whether to display labels. Default: `TRUE`. #' @param label_size Size of labels. Default: 10 -#' @param vector_friendly Logical, whether to optimize for vector outputs. Default: TRUE +#' @param vector_friendly Logical, whether to optimize for vector outputs. Default: `TRUE`. #' @param cells_use A vector of cell names to use for the plot. Default: NULL -#' @param no_axes Logical, whether to hide axes. Default: TRUE +#' @param no_axes Logical, whether to hide axes. Default: `TRUE`. #' @param pt_size Size of points. Default: 0.5 #' @param name.suffix Suffix to append to the plot's name. Default: NULL #' @param width Width of the plot. Default: hA4 @@ -343,7 +344,7 @@ umapNamedClusters <- function(obj = combined.obj, #' @description Relabel cluster numbers along the principal curve of 2 UMAP (or tSNE) dimensions. # #' @param obj Seurat object, Default: combined.obj #' @param dim Dimensions to use, Default: 1:2 -#' @param plotit Plot results (& show it), Default: TRUE +#' @param plotit Plot results (& show it), Default: `TRUE`. #' @param swap Swap Lambda paramter (multiplied with this) , Default: -1 #' @param reduction UMAP, tSNE, or PCA (Dim. reduction to use), Default: 'umap' #' @param res Clustering resoluton to use, Default: 'integrated_snn_res.0.5' @@ -411,18 +412,18 @@ AutoNumber.by.PrinCurve <- function( #' @param dataDir A string specifying the directory that contains the 10X Genomics output folders. #' This directory should include subdirectories for raw and filtered data, typically named starting with #' `raw_` and `filt_`, respectively. -#' @param cellIDs An optional vector of cell IDs to include in the loaded data. Default is `NULL`, +#' @param cellIDs An optional vector of cell IDs to include in the loaded data. Default: `NULL`, #' indicating that all available cells will be included. This is useful for subsetting the data based #' on specific cell IDs. #' @param channelName An optional string specifying the channel name for the data being loaded. -#' This can be used to label the data according to the experimental condition or sample name. Default is `NULL`. +#' This can be used to label the data according to the experimental condition or sample name. Default: `NULL`. #' @param readArgs A list of additional arguments to pass to the internal `Read10X` function used for -#' loading the data. Default is an empty list. +#' loading the data. Default: an empty list. #' @param includeFeatures A character vector specifying which features to include in the loaded data. #' Common values include "Gene Expression", "Antibody Capture", and "CRISPR Guide Capture". -#' Default is `c("Gene Expression")`. +#' Default: `c("Gene Expression")`. #' @param verbose A logical flag indicating whether to print progress messages and status updates as the -#' data is loaded. Default is `TRUE`. +#' data is loaded. Default: `TRUE`. #' @param ... Additional arguments passed to other internally called functions, if applicable. #' @return An object of class `SoupChannel`, representing the loaded 10X data. This object includes #' raw counts, filtered counts, and optionally, additional metadata and dimensionality reduction coordinates @@ -445,7 +446,6 @@ AutoNumber.by.PrinCurve <- function( #' @seealso \code{\link[SoupX]{SoupChannel}} for the structure and utilities of the `SoupChannel` class. #' #' @export -#' @importFrom SoupX SoupChannel load10Xv3 <- function(dataDir, cellIDs = NULL, channelName = NULL, readArgs = list(), includeFeatures = c("Gene Expression"), verbose = TRUE, ...) { @@ -543,7 +543,7 @@ load10Xv3 <- function(dataDir, cellIDs = NULL, channelName = NULL, readArgs = li ) } - "Maybe the one below should be within the above if statement?" + stopifnot("Package 'SoupX' must be installed to use this function." = require("SoupX")) channel <- SoupX::SoupChannel( tod = dat, toc = datCells, metaData = mDat, channelName = channelName, dataDir = dataDir, dataType = "10X", @@ -555,16 +555,18 @@ load10Xv3 <- function(dataDir, cellIDs = NULL, channelName = NULL, readArgs = li + + # _________________________________________________________________________________________________ #' @title Convert10Xfolders.old #' #' @description This function takes a parent directory with a number of subfolders, each containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, (2) converts them to Seurat objects, and (3) saves them as .RDS files. #' @param InputDir A character string specifying the input directory. -#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default is 'filtered'. -#' @param min.cells An integer value specifying the minimum number of cells. Default is 10. -#' @param min.features An integer value specifying the minimum number of features. Default is 200. -#' @param updateHGNC A logical value indicating whether to update the HGNC. Default is TRUE. -#' @param ShowStats A logical value indicating whether to show statistics. Default is TRUE. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered'. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 10. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`. #' @examples #' \dontrun{ #' if (interactive()) { @@ -647,8 +649,10 @@ removeScaleData <- function(ls.obj) { #' @description This function removes layers from a Seurat object's RNA assay based on a specified regular expression pattern. #' It first backs up the object before removing layers that match the pattern. #' -#' @param seuratObj A Seurat object. +#' @param obj A Seurat object. #' @param pattern A regular expression pattern to match layer names. +#' @param perl A logical value indicating whether to use Perl-compatible regular expressions. +#' Default: `TRUE`. #' #' @importFrom CodeAndRoll2 grepv #' @return A Seurat object with specified layers removed. @@ -676,7 +680,7 @@ get.clustercomposition <- function(...) .Deprecated("No longer provided.") multiFeatureHeatmap.A4 <- function(...) .Deprecated("No longer provided.") Annotate4Plotly3D <- function(...) .Deprecated(".Annotate4Plotly3D() - with dot/invisible.") Percent.in.Trome <- function(...) .Deprecated("PercentInTranscriptome()") -.parseRegressionVariablesForScaleData <- function(...) .Deprecated(".getRegressionVariablesForScaleData()") +.parseRegressionVariablesForScaleData <- function(...) .Deprecated(".getRegressionVariablesForScaleData()") seu.add.meta.from.vector <- function(...) .Deprecated("addMetaDataSafe()") # _________________________________________________________________________________________________ @@ -725,16 +729,16 @@ Create.MiscSlot <- function(obj, NewSlotName = "UVI.tables", SubSlotName = NULL) #' and finding clusters. It optionally performs t-SNE and saves the object. #' #' @param obj The Seurat object. -#' @param n.var.features The number of variable features to use. Default is the 'n.var.genes' element from a list 'p'. -#' @param features.scale A logical value indicating whether to scale the data. Default is TRUE. +#' @param n.var.features The number of variable features to use. Default: the 'n.var.genes' element from a list 'p'. +#' @param features.scale A logical value indicating whether to scale the data. Default: `TRUE`. #' @param vars.to.regress A vector of variable names to be regressed out. #' @param suffix A character string to be used as a suffix when saving the object. -#' @param nPCs The number of principal components to use. Default is the 'n.PC' element from a list 'p'. -#' @param clust_resolutions The resolution for clustering. Default is the 'snn_res' element from a list 'p'. -#' @param calc_tSNE Logical, if TRUE, t-SNE will be performed. Default is FALSE. -#' @param plot_umaps Logical, if TRUE, UMAP plots will be generated. Default is TRUE. -#' @param save_obj Logical, if TRUE, the object will be saved. Default is TRUE. -#' @param assayX The assay to be used in scaling data. Default is 'RNA'. +#' @param nPCs The number of principal components to use. Default: the 'n.PC' element from a list 'p'. +#' @param clust_resolutions The resolution for clustering. Default: the 'snn_res' element from a list 'p'. +#' @param calc_tSNE Logical, if TRUE, t-SNE will be performed. Default: `FALSE`. +#' @param plot_umaps Logical, if TRUE, UMAP plots will be generated. Default: `TRUE`. +#' @param save_obj Logical, if TRUE, the object will be saved. Default: `TRUE`. +#' @param assayX The assay to be used in scaling data. Default: 'RNA'. #' @return Seurat object after calculations and manipulations. #' @importFrom Seurat FindVariableFeatures ScaleData RunPCA FindNeighbors FindClusters RunTSNE #' @importFrom MarkdownReports create_set_OutDir @@ -851,9 +855,9 @@ PrctCellExpringGene <- function(genes, group.by = "all", obj = combined.obj, ...) { .Deprecated("PctCellsExpressingGenes") # - nf <- setdiff(genes, c(Features(obj, assay = 'RNA'), colnames(obj@m@data))) + nf <- setdiff(genes, c(Features(obj, assay = "RNA"), colnames(obj@m@data))) - if(length(nf) > 0) message("Some genes/ features not found: ", nf) + if (length(nf) > 0) message("Some genes/ features not found: ", nf) stopifnot("Some genes not foun!." = all(genes %in% Features(obj))) @@ -1095,8 +1099,8 @@ ww.calc_helper <- function(obj, genes, slot = "RNA") { # #' @param splitby Variable to split the data by, typically a project or dataset identifier. # #' Default: 'ShortNames'. # #' @param color Bar color. Default: as defined by `splitby`. -# #' @param plot Whether to display the plot. Default: TRUE. -# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: TRUE. +# #' @param plot Whether to display the plot. Default: `TRUE`. +# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: `TRUE`. # #' @param ... Additional parameters for plotting functions. # #' # #' @return If `plot` is TRUE, displays a bar plot showing the composition of each cluster. Otherwise, @@ -1152,8 +1156,8 @@ ww.calc_helper <- function(obj, genes, slot = "RNA") { # #' @param splitby Variable to split the data by, typically a project or dataset identifier. # #' Default: 'ShortNames'. # #' @param color Bar color. Default: as defined by `splitby`. -# #' @param plot Whether to display the plot. Default: TRUE. -# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: TRUE. +# #' @param plot Whether to display the plot. Default: `TRUE`. +# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: `TRUE`. # #' @param ... Additional parameters for plotting functions. # #' # #' @return If `plot` is TRUE, displays a bar plot showing the composition of each cluster. Otherwise, @@ -1210,7 +1214,7 @@ ww.calc_helper <- function(obj, genes, slot = "RNA") { # #' @param group.cells.by Cell grouping variable for the heatmap. Default: 'batch' # #' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap' # #' @param cex Point size in the plot. Default: iround(3/gene.per.page) -# #' @param sep_scale Logical, whether to scale the features separately. Default: FALSE +# #' @param sep_scale Logical, whether to scale the features separately. Default: `FALSE`. # #' @param gene.min.exp Minimum gene expression level for plotting. Default: 'q5' # #' @param gene.max.exp Maximum gene expression level for plotting. Default: 'q95' # #' @param jpeg.res Resolution of the jpeg output. Default: 225 diff --git a/R/Seurat.utils.less.used.R.bac b/R/Seurat.utils.less.used.R.bac new file mode 100644 index 0000000..22734ee --- /dev/null +++ b/R/Seurat.utils.less.used.R.bac @@ -0,0 +1,1276 @@ +# ____________________________________________________________________ +# Seurat.utils.less.used.R ---- +# ____________________________________________________________________ +# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R") + + + +# _________________________________________________________________________________________________ +#' @title Convert10Xfolders - legacy version +#' +#' @description This function takes a parent directory with a number of subfolders, each +#' containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, +#' (2) converts them to Seurat objects, and (3) saves them as .RDS files. +#' @param InputDir A character string specifying the input directory. +#' @param regex A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: `FALSE`.. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 5. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`.. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`.. +#' @param writeCBCtable A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: `TRUE`.. +#' @param depth An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2. +#' @param sample.barcoding A logical value indicating whether Cell Ranger was run with sample barcoding. Default: `FALSE`.. +#' @param sort_alphanumeric sort files alphanumeric? Default: `TRUE`.. +#' @examples +#' \dontrun{ +#' if (interactive()) Convert10Xfolders(InputDir) +#' } +#' @export +Convert10Xfolders_v1 <- function( + InputDir, + regex = FALSE, + folderPattern = c("filtered_feature", "raw_feature", "SoupX_decont")[1], + depth = 4, + min.cells = 5, min.features = 200, + updateHGNC = TRUE, ShowStats = TRUE, + writeCBCtable = TRUE, + sample.barcoding = FALSE, + nthreads = .getNrCores(), + preset = "high", + ext = "qs", + sort_alphanumeric = TRUE, + ...) { + warning("Since v2.5.0, the output is saved in the more effcient qs format! See qs package.", immediate. = TRUE) + + finOrig <- ReplaceRepeatedSlashes(list.dirs.depth.n(InputDir, depth = depth)) + fin <- CodeAndRoll2::grepv(x = finOrig, pattern = folderPattern, perl = regex) + + iprint(length(fin), "samples found.") + + samples <- basename(list.dirs(InputDir, recursive = FALSE)) + if (sort_alphanumeric) samples <- gtools::mixedsort(samples) + iprint("Samples:", samples) + + if (!length(fin) > 0) { + stop(paste("No subfolders found with pattern", folderPattern, "in dirs like: ", finOrig[1:3])) + } + + for (i in 1:length(fin)) { + print(i) + pathIN <- Stringendo::FixPath(fin[i]) + print(pathIN) + + # sample.barcoding --- --- --- + fnameIN <- if (sample.barcoding) { + samples[i] + } else { + basename(dirname(dirname(pathIN))) + } + print("") + print(fnameIN) + + count_matrix <- Read10X(pathIN ) + if (!is.list(count_matrix) | length(count_matrix) == 1) { + seu <- CreateSeuratObject( + counts = count_matrix, project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + } else if (is.list(count_matrix) & length(count_matrix) == 2) { + seu <- CreateSeuratObject( + counts = count_matrix[[1]], project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + + # LSB, Lipid Sample barcode (Multi-seq) --- --- --- --- --- --- + LSB <- CreateSeuratObject(counts = count_matrix[[2]], project = fnameIN) + + LSBnameOUT <- ppp(paste0(InputDir, "/LSB.", fnameIN), "qs") + qs::qsave(x = LSB, file = LSBnameOUT) + } else { + print("More than 2 elements in the list of matrices") + } + + ncells <- ncol(seu) + fname_X <- Stringendo::sppp( + fnameIN, "min.cells", min.cells, "min.features", min.features, + "cells", ncells + ) + print(fname_X) + + f.path.out <- Stringendo::ParseFullFilePath(path = InputDir, file_name = fname_X, extension = ext) + message(f.path.out) + + # update --- --- --- + if (updateHGNC) seu <- UpdateGenesSeurat(seu, EnforceUnique = TRUE, ShowStats = TRUE) + + # write out --- --- --- + qs::qsave(x = seu, file = f.path.out, nthreads = nthreads, preset = preset) + + # write cellIDs --- --- --- + if (writeCBCtable) { + CBCs <- t(t(colnames(seu))) + colnames(CBCs) <- "CBC" + ReadWriter::write.simple.tsv(input_df = CBCs, manual_file_name = sppp(fnameIN, "CBC"), manual_directory = InputDir) + } + } # for +} + + + + +# ____________________________________________________________________________________ +#' @title plot.UMAP.tSNE.sidebyside +#' +#' @description Plot a UMAP and tSNE side by side. +#' @param obj Seurat object. Default: combined.obj +#' @param grouping Variable to group cells by. Default: 'res.0.6' +#' @param no_legend Logical, whether to display legend. Default: `FALSE`. +#' @param do_return Logical, whether to return plot object. Default: `TRUE`. +#' @param do_label Logical, whether to display labels. Default: `TRUE`. +#' @param label_size Size of labels. Default: 10 +#' @param vector_friendly Logical, whether to optimize for vector outputs. Default: `TRUE`. +#' @param cells_use A vector of cell names to use for the plot. Default: NULL +#' @param no_axes Logical, whether to hide axes. Default: `TRUE`. +#' @param pt_size Size of points. Default: 0.5 +#' @param name.suffix Suffix to append to the plot's name. Default: NULL +#' @param width Width of the plot. Default: hA4 +#' @param heigth Height of the plot. Default: 1.75 * wA4 +#' @param filetype Filetype to save plot as. Default: 'pdf' +#' @param ... Pass any other parameter to the internally called functions (most of them should work). +#' @seealso +#' \code{\link[cowplot]{save_plot}} +#' @importFrom cowplot save_plot plot_grid +#' +#' @export plot.UMAP.tSNE.sidebyside +plot.UMAP.tSNE.sidebyside <- function(obj = combined.obj, grouping = "res.0.6", # Plot a UMAP and tSNE sidebyside + no_legend = FALSE, + do_return = TRUE, + do_label = TRUE, + label_size = 10, + vector_friendly = TRUE, + cells_use = NULL, + no_axes = TRUE, + pt_size = 0.5, + name.suffix = NULL, + width = hA4, heigth = 1.75 * wA4, filetype = "pdf", ...) { + p1 <- Seurat::DimPlot( + object = obj, reduction.use = "tsne", no.axes = no_axes, cells.use = cells_use, + no.legend = no_legend, do.return = do_return, do.label = do_label, label.size = label_size, + group.by = grouping, vector.friendly = vector_friendly, pt.size = pt_size, ... + ) + + ggtitle("tSNE") + theme(plot.title = element_text(hjust = 0.5)) + + p2 <- Seurat::DimPlot( + object = obj, reduction.use = "umap", no.axes = no_axes, cells.use = cells_use, + no.legend = TRUE, do.return = do_return, do.label = do_label, label.size = label_size, + group.by = grouping, vector.friendly = vector_friendly, pt.size = pt_size, ... + ) + + ggtitle("UMAP") + theme(plot.title = element_text(hjust = 0.5)) + + plots <- cowplot::plot_grid(p1, p2, labels = c("A", "B"), ncol = 2) + plotname <- kpp("UMAP.tSNE", grouping, name.suffix, filetype) + + cowplot::save_plot( + filename = plotname, plot = plots, + ncol = 2 # we're saving a grid plot of 2 columns + , nrow = 1 # and 2 rows + , base_width = width, + base_height = heigth + # each individual subplot should have an aspect ratio of 1.3 + # , base_aspect_ratio = 1.5 + ) +} + + +# _________________________________________________________________________________________________ +#' @title Plot multiple categorical variables in combined UMAPs +#' +#' @description Generates and saves multiple UMAP plots for clustering results, adjusting the +#' layout and plot dimensions. Supports the generation of plots in different +#' formats and customization of the visual appearance. +#' +#' @param idents A vector of cluster identities to plot. Default: `GetClusteringRuns()[1:4]`. +#' @param obj The Seurat object containing clustering information. Default: `combined.obj`. +#' @param foldername The name of the folder to save plots. Default: `substitute(ident)`. +#' @param plot.reduction The dimensionality reduction technique to use for plotting. Default: "umap". +#' @param intersectionAssay The assay to use for intersection. Default: "RNA". +#' @param layout The layout orientation, either "tall", "wide", or `FALSE` to disable. Default: "wide". +#' @param nr.Col Number of columns in the plot grid. Default: 2. +#' @param nr.Row Number of rows in the plot grid. Default: 4. +#' @param cex The character expansion size for plot text, automatically adjusted. Default: `round(0.1 / (nr.Col * nr.Row), digits = 2)`. +#' @param label Logical indicating if labels should be displayed on the plots. Default: `FALSE`. +#' @param legend Logical indicating if a legend should be included in the plots. Default: `!label`. +#' @param subdir Logical indicating if a subdirectory should be created for saving plots. Default: `TRUE`. +#' @param prefix Optional prefix for plot filenames. Default: `NULL`. +#' @param suffix Optional suffix for plot filenames. Default: `NULL`. +#' @param background_col The background color of the plot. Default: "white". +#' @param aspect.ratio The aspect ratio of the plot, `FALSE` to disable fixed ratio. Default: 0.6. +#' @param saveGeneList Logical indicating if a list of genes should be saved. Default: `FALSE`. +#' @param w The width of the plot in inches. Default: `8.27`. +#' @param h The height of the plot in inches. Default: `11.69`. +#' @param scaling The scaling factor to apply to plot dimensions. Default: 1. +#' @param format The file format for saving plots. Default: "jpg". +#' @param ... Additional arguments passed to plotting functions. +#' +#' @return Invisible `NULL`. Plots are saved to files. +#' @examples +#' \dontrun{ +#' multi_clUMAP.A4(idents = c("S1", "S2"), obj = YourSeuratObject) +#' } +#' @export + +multi_clUMAP.A4 <- function( + obj = combined.obj, + idents = GetClusteringRuns(obj)[1:4], + foldername = "clUMAPs_multi", + plot.reduction = "umap", + intersectionAssay = c("RNA", "integrated")[1], + layout = c("tall", "wide", FALSE)[2], + # colors = c("grey", "red"), + nr.Col = 2, nr.Row = 4, + cex = round(0.1 / (nr.Col * nr.Row), digits = 2), + label = FALSE, # can be a vector of length idents + legend = !label, + subdir = TRUE, + prefix = NULL, suffix = NULL, + background_col = "white", + aspect.ratio = c(FALSE, 0.6)[2], + saveGeneList = FALSE, + w = 8.27, h = 11.69, scaling = 1, + format = c("jpg", "pdf", "png")[1], + ...) { + .Deprecated("qClusteringUMAPS") + message("multi_clUMAP.A4() is kept because it can plot more than 4 resolutions, inti a subfolder.") + + tictoc::tic() + ParentDir <- OutDir + if (is.null(foldername)) foldername <- "clusters" + if (subdir) create_set_SubDir(paste0(foldername, "-", plot.reduction), "/") + + DefaultAssay(obj) <- intersectionAssay + + # Adjust plot dimensions and grid layout based on specified layout + .adjustLayout(layout, scaling, wA4 = 8.27, hA4 = 11.69, environment()) + + + # Split clusters into lists for plotting + ls.idents <- CodeAndRoll2::split_vec_to_list_by_N(1:length(idents), by = nr.Row * nr.Col) + for (i in 1:length(ls.idents)) { + idents_on_this_page <- idents[ls.idents[[i]]] + iprint("page:", i, "| idents", kppc(idents_on_this_page)) + (plotname <- kpp(c(prefix, plot.reduction, i, "idents", ls.idents[[i]], suffix, format))) + + plot.list <- list() + for (i in seq(idents_on_this_page)) { + # browser() + if (length(label) == 1) { + label_X <- label + legend_X <- legend + } else { + label_X <- label[i] + legend_X <- legend[i] + } + + ident_X <- idents_on_this_page[i] + imessage("plotting:", ident_X) + plot.list[[i]] <- clUMAP( + ident = ident_X, obj = obj, plotname = label_X, + label = label_X, legend = legend_X, save.plot = FALSE, h = h, w = w, ... + ) + } + + # Customize plot appearance + for (i in 1:length(plot.list)) { + plot.list[[i]] <- plot.list[[i]] + NoAxes() + if (aspect.ratio) plot.list[[i]] <- plot.list[[i]] + ggplot2::coord_fixed(ratio = aspect.ratio) + } + + # Save plots + pltGrid <- cowplot::plot_grid(plotlist = plot.list, ncol = nr.Col, nrow = nr.Row) + cowplot::ggsave2(filename = plotname, width = w, height = h, bg = background_col, plot = pltGrid) + } # for ls.idents + + if (subdir) MarkdownReports::create_set_OutDir(ParentDir) + tictoc::toc() +} + + + +# _________________________________________________________________________________________________ +#' @title Plot and Save UMAP without legend +#' +#' @description Generates a UMAP plot colored by a specified metadata column and saves the plot to a file. +#' +#' @param obj Seurat object to be visualized; Default: `combined.obj`. +#' @param metaD.colname Metadata column name to color the UMAP by; Default: 'metaD.colname.labeled'. +#' @param ext File extension for the saved plot, supports 'png', 'pdf', etc.; Default: 'png'. +#' @param ... Additional arguments passed to Seurat's `DimPlot`. +#' +#' @return Displays a UMAP plot and saves it to the current working directory. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' umapNamedClusters(obj = combined.obj, metaD.colname = "metaD.colname.labeled") +#' } +#' } +#' +#' @export +#' @importFrom Seurat DimPlot +#' @importFrom ggplot2 ggtitle +#' @importFrom cowplot save_plot + +umapNamedClusters <- function(obj = combined.obj, + metaD.colname = metaD.colname.labeled, + ext = "png", ...) { + warning("This function is deprecated. No support.") + fname <- ppp("Named.clusters", metaD.colname, ext) + p.named <- + Seurat::DimPlot(obj, reduction = "umap", group.by = metaD.colname, label = TRUE, ...) + + NoLegend() + + ggtitle(metaD.colname) + save_plot(p.named, filename = fname) + p.named +} + + + +# _________________________________________________________________________________________________ +#' @title AutoNumber.by.PrinCurve +#' +#' @description Relabel cluster numbers along the principal curve of 2 UMAP (or tSNE) dimensions. # +#' @param obj Seurat object, Default: combined.obj +#' @param dim Dimensions to use, Default: 1:2 +#' @param plotit Plot results (& show it), Default: `TRUE`. +#' @param swap Swap Lambda paramter (multiplied with this) , Default: -1 +#' @param reduction UMAP, tSNE, or PCA (Dim. reduction to use), Default: 'umap' +#' @param res Clustering resoluton to use, Default: 'integrated_snn_res.0.5' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' DimPlot.ClusterNames(ident = "integrated_snn_res.0.5") +#' combined.obj <- AutoNumber.by.PrinCurve( +#' obj = combined.obj, dim = 1:2, reduction = "umap", plotit = TRUE, +#' swap = -1, res = "integrated_snn_res.0.5" +#' ) +#' DimPlot.ClusterNames(ident = "integrated_snn_res.0.5.prin.curve") +#' } +#' } +#' @seealso +#' \code{\link[princurve]{principal_curve}} +#' @importFrom princurve principal_curve whiskers +#' @importFrom MarkdownReports wplot_save_this +#' @importFrom Seurat FetchData +#' +#' @export +AutoNumber.by.PrinCurve <- function( + obj = combined.obj # Relabel cluster numbers along the principal curve of 2 UMAP (or tSNE) dimensions. + , dim = 1:2, plotit = TRUE, swap = -1, + reduction = "umap", res = "integrated_snn_res.0.5") { + # require(princurve) + dim_name <- ppu(toupper(reduction), dim) + coord.umap <- FetchData(object = obj, vars = dim_name) + fit <- princurve::principal_curve(x = as.matrix(coord.umap)) + if (plotit) { + plot(fit, + xlim = range(coord.umap[, 1]), ylim = range(coord.umap[, 2]), + main = "principal_curve" + ) + # points(fit) + points(coord.umap, pch = 18, cex = .25) + princurve::whiskers(coord.umap, fit$s, lwd = .1) + MarkdownReports::wplot_save_this(plotname = "principal_curve") + } + + ls.perCl <- split(swap * fit$lambda, f = obj[[res]]) + MedianClusterCoordinate <- unlapply(ls.perCl, median) + OldLabel <- names(sort(MedianClusterCoordinate)) + NewLabel <- as.character(0:(length(MedianClusterCoordinate) - 1)) + NewMeta <- translate(vec = obj[[res]], old = OldLabel, new = NewLabel) + NewMetaCol <- kpp(res, "prin.curve") + iprint("NewMetaCol:", NewMetaCol) + obj[[NewMetaCol]] <- NewMeta + return(obj) +} + + + +# _________________________________________________________________________________________________ +# Read and Write Seurat Objects ---- +# _________________________________________________________________________________________________ + + + +#' @title Load 10X Genomics Version 3 Data +#' +#' @description Loads 10X Genomics data from a specified directory containing output folders for raw and filtered data. +#' This function is designed to handle data from 10X Genomics Chromium Single Cell technologies (version 3). +#' +#' @param dataDir A string specifying the directory that contains the 10X Genomics output folders. +#' This directory should include subdirectories for raw and filtered data, typically named starting with +#' `raw_` and `filt_`, respectively. +#' @param cellIDs An optional vector of cell IDs to include in the loaded data. Default: `NULL`, +#' indicating that all available cells will be included. This is useful for subsetting the data based +#' on specific cell IDs. +#' @param channelName An optional string specifying the channel name for the data being loaded. +#' This can be used to label the data according to the experimental condition or sample name. Default: `NULL`. +#' @param readArgs A list of additional arguments to pass to the internal `Read10X` function used for +#' loading the data. Default: an empty list. +#' @param includeFeatures A character vector specifying which features to include in the loaded data. +#' Common values include "Gene Expression", "Antibody Capture", and "CRISPR Guide Capture". +#' Default: `c("Gene Expression")`. +#' @param verbose A logical flag indicating whether to print progress messages and status updates as the +#' data is loaded. Default: `TRUE`. +#' @param ... Additional arguments passed to other internally called functions, if applicable. +#' @return An object of class `SoupChannel`, representing the loaded 10X data. This object includes +#' raw counts, filtered counts, and optionally, additional metadata and dimensionality reduction coordinates +#' (e.g., t-SNE). +#' +#' @details This function provides a comprehensive approach to loading and organizing 10X Genomics data +#' for further analysis. It accommodates the data structure commonly found in 10X Genomics version 3 outputs +#' and allows for the inclusion of various types of molecular data as well as optional cell and channel +#' specifications. +#' +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' # Assuming `dataDir` is the path to your 10X data directory +#' channel <- load10Xv3(dataDir = "path/to/10X/data") +#' # Now `channel` contains the loaded 10X data as a `SoupChannel` object +#' } +#' } +#' +#' @seealso \code{\link[SoupX]{SoupChannel}} for the structure and utilities of the `SoupChannel` class. +#' +#' @export +#' @importFrom SoupX SoupChannel +load10Xv3 <- function(dataDir, cellIDs = NULL, channelName = NULL, readArgs = list(), + includeFeatures = c("Gene Expression"), verbose = TRUE, + ...) { + # include + dirz <- list.dirs(dataDir, full.names = FALSE, recursive = FALSE) + path.raw <- file.path(dataDir, grep(x = dirz, pattern = "^raw_*", value = TRUE)) + path.filt <- file.path(dataDir, grep(x = dirz, pattern = "^filt_*", value = TRUE)) + CR.matrices <- list.fromNames(c("raw", "filt")) + + + (isV3 <- any(grepl(x = dirz, pattern = "^raw_feature_bc*"))) + tgt <- path.raw + + if (!isV3) { + tgt <- file.path(tgt, list.files(tgt)) + } + if (verbose) { + message(sprintf("Loading raw count data")) + } + dat <- do.call(Read10X, c(list(data.dir = tgt), readArgs)) + if (verbose) { + message(sprintf("Loading cell-only count data")) + } + if (!is.null(cellIDs)) { + if (all(grepl("\\-1$", cellIDs))) { + cellIDs <- gsub("\\-1$", "", cellIDs) + } + if (!all(cellIDs %in% colnames(dat))) { + stop("Not all supplied cellIDs found in raw data.") + } + datCells <- dat[, match(cellIDs, colnames(dat))] + } else { + tgt <- path.filt + if (!isV3) { + tgt <- file.path(tgt, list.files(tgt)) + } + datCells <- do.call(Read10X, c( + list(data.dir = tgt), + readArgs + )) + if (is.list(dat)) { + dat <- do.call(rbind, dat[includeFeatures]) + datCells <- do.call(rbind, datCells[includeFeatures]) + } + } + if (verbose) { + message(sprintf("Loading extra analysis data where available")) + } + mDat <- NULL + tgt <- file.path( + dataDir, "analysis", "clustering", "graphclust", + "clusters.csv" + ) + if (file.exists(tgt)) { + clusters <- read.csv(tgt) + mDat <- data.frame(clusters = clusters$Cluster, row.names = clusters$Barcode) + } + tgt <- file.path( + dataDir, "analysis", "clustering", "kmeans_10_clusters", + "clusters.csv" + ) + if (file.exists(tgt)) { + clusters <- read.csv(tgt) + mDat$clustersFine <- clusters$Cluster + } + tgt <- file.path( + dataDir, "analysis", "tsne", "2_components", + "projection.csv" + ) + + if (file.exists(tgt)) { + tsne <- read.csv(tgt) + if (is.null(mDat)) { + mDat <- data.frame( + tSNE1 = tsne$TSNE.1, tSNE2 = tsne$TSNE.2, + row.names = tsne$Barcode + ) + } else { + mDat$tSNE1 <- tsne$TSNE.1[match(rownames(mDat), tsne$Barcode)] + mDat$tSNE2 <- tsne$TSNE.2[match(rownames(mDat), tsne$Barcode)] + } + DR <- c("tSNE1", "tSNE2") + } else { + DR <- NULL + } + if (!is.null(mDat) && any(rownames(mDat) != colnames(datCells))) { + rownames(mDat) <- gsub("-1$", "", rownames(mDat)) + if (any(rownames(mDat) != colnames(datCells))) { + stop("Error matching meta-data to cell names.") + } + } + if (is.null(channelName)) { + channelName <- ifelse(is.null(names(dataDir)), dataDir, + names(dataDir) + ) + } + + "Maybe the one below should be within the above if statement?" + channel <- SoupX::SoupChannel( + tod = dat, toc = datCells, metaData = mDat, + channelName = channelName, dataDir = dataDir, dataType = "10X", + isV3 = isV3, DR = DR, ... + ) + return(channel) +} + + + + +# _________________________________________________________________________________________________ +#' @title Convert10Xfolders.old +#' +#' @description This function takes a parent directory with a number of subfolders, each containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, (2) converts them to Seurat objects, and (3) saves them as .RDS files. +#' @param InputDir A character string specifying the input directory. +#' @param folderPattern A character vector specifying the pattern of folder names to be searched. Default: 'filtered'. +#' @param min.cells An integer value specifying the minimum number of cells. Default: 10. +#' @param min.features An integer value specifying the minimum number of features. Default: 200. +#' @param updateHGNC A logical value indicating whether to update the HGNC. Default: `TRUE`.. +#' @param ShowStats A logical value indicating whether to show statistics. Default: `TRUE`.. +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' Convert10Xfolders.old(InputDir = InputDir) +#' } +#' } +#' @export +Convert10Xfolders.old <- function( + InputDir, + folderPattern = c("filtered", "SoupX_decont")[1], + min.cells = 10, min.features = 200, + updateHGNC = TRUE, ShowStats = TRUE) { + # ... function body ... +} + +Convert10Xfolders.old <- function( + InputDir # Take a parent directory with a number of subfolders, each containing the standard output of 10X Cell Ranger. (1.) It loads the filtered data matrices; (2.) converts them to Seurat objects, and (3.) saves them as *.RDS files. + , folderPattern = c("filtered", "SoupX_decont")[1], + min.cells = 10, min.features = 200, updateHGNC = TRUE, ShowStats = TRUE) { + fin <- list.dirs(InputDir, recursive = FALSE) + fin <- CodeAndRoll2::grepv(x = fin, pattern = folderPattern, perl = FALSE) + + for (i in 1:length(fin)) { + pathIN <- fin[i] + print(pathIN) + fnameIN <- basename(fin[i]) + fnameOUT <- ppp(paste0(InputDir, "/", fnameIN), "min.cells", min.cells, "min.features", min.features, "Rds") + count_matrix <- Read10X(pathIN) + + if (!is.list(count_matrix) | length(count_matrix) == 1) { + seu <- CreateSeuratObject( + counts = count_matrix, project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + } else if (is.list(count_matrix) & length(count_matrix) == 2) { + seu <- CreateSeuratObject( + counts = count_matrix[[1]], project = fnameIN, + min.cells = min.cells, min.features = min.features + ) + + # LSB, Lipid Sample barcode (Multi-seq) --- --- --- --- --- --- + LSB <- CreateSeuratObject(counts = count_matrix[[2]], project = fnameIN) + LSBnameOUT <- ppp(paste0(InputDir, "/LSB.", fnameIN), "Rds") + saveRDS(LSB, file = LSBnameOUT) + } else { + print("More than 2 elements in the list of matrices") + } + # update --- --- --- --- + if (updateHGNC) seu <- UpdateGenesSeurat(seu, EnforceUnique = TRUE, ShowStats = TRUE) + saveRDS(seu, file = fnameOUT) + } +} + + + +# _________________________________________________________________________________________________ +# Layer Removal ---- +# _________________________________________________________________________________________________ + +# _________________________________________________________________________________________________ +#' @title Remove Scale Data from Seurat Objects +#' +#' @param ls.obj A list of Seurat objects. +#' @return A list of Seurat objects with `scale.data` slot removed from RNA assays. +#' @examples +#' # Assuming `seuratList` is a list of Seurat objects +#' seuratList <- removeScaleData(seuratList) +#' @export +removeScaleData <- function(ls.obj) { + lapply(ls.obj, function(x) { + x@assays$RNA@layers$scale.data <- NULL + x + }) +} + + +# _________________________________________________________________________________________________ +#' @title Remove Layers from Seurat Object by Pattern +#' +#' @description This function removes layers from a Seurat object's RNA assay based on a specified regular expression pattern. +#' It first backs up the object before removing layers that match the pattern. +#' +#' @param obj A Seurat object. +#' @param pattern A regular expression pattern to match layer names. +#' @param perl A logical value indicating whether to use Perl-compatible regular expressions. +#' Default: `TRUE`. +#' +#' @importFrom CodeAndRoll2 grepv +#' @return A Seurat object with specified layers removed. +#' @export +removeLayersByPattern <- function(obj, pattern = "sc[0-9][0-9]_", perl = TRUE) { + message(paste("pattern: ", pattern)) + stopifnot("obj must be a Seurat object" = inherits(obj, "Seurat")) + + layerNames <- Layers(obj) + layersToRemove <- CodeAndRoll2::grepv(pattern, x = layerNames, perl = perl) + message(paste(length(layersToRemove), "form", length(layerNames), "layers are removed.")) + obj@assays$RNA@layers[layersToRemove] <- NULL + return(obj) +} + +# _________________________________________________________________________________________________ +# Deprecated ---- +# _________________________________________________________________________________________________ +set.all.genes <- function() .Deprecated("calc.q99.Expression.and.set.all.genes()") +save2umaps.A4 <- function(...) .Deprecated("save2plots.A4()") +save4umaps.A4 <- function(...) .Deprecated("save4plots.A4()") +plotGeneExpHist <- function(...) .Deprecated("plotGeneExprHistAcrossCells()") +geneExpressionLevelPlots <- function(...) .Deprecated("plotGeneExpressionInBackgroundHist()") +get.clustercomposition <- function(...) .Deprecated("No longer provided.") +multiFeatureHeatmap.A4 <- function(...) .Deprecated("No longer provided.") +Annotate4Plotly3D <- function(...) .Deprecated(".Annotate4Plotly3D() - with dot/invisible.") +Percent.in.Trome <- function(...) .Deprecated("PercentInTranscriptome()") +.parseRegressionVariablesForScaleData <- function(...) .Deprecated(".getRegressionVariablesForScaleData()") +seu.add.meta.from.vector <- function(...) .Deprecated("addMetaDataSafe()") + +# _________________________________________________________________________________________________ +# Main script / functions + + +# will it be used? +cellID_to_cellType_v1 <- function(cellIDs, ident, obj = aaa) { + celltypes <- as.named.vector.df(obj@meta.data[, ident], verbose = FALSE) + celltypes[cellIDs] +} + +cellID_to_cellType <- function(cellIDs, ident_w_names) { + ident_w_names[cellIDs] +} + + +# _________________________________________________________________________________________________ +#' @title Create.MiscSlot +#' +#' @description Create a new slot in the 'misc' slot of a Seurat object. +#' @param obj Seurat object +#' @param NewSlotName Name of the new element inside obj@misc. +#' @export + +Create.MiscSlot <- function(obj, NewSlotName = "UVI.tables", SubSlotName = NULL) { + .Deprecated("addToMiscOrToolsSlot") + # if (is.null(obj@misc[[NewSlotName]])) obj@misc[[NewSlotName]] <- list() else iprint(NewSlotName, "already exists in @misc.") + # if (is.null(obj@misc[[NewSlotName]][[SubSlotName]])) obj@misc[[NewSlotName]][[SubSlotName]] <- list() else iprint(SubSlotName, "subslot already exists in @misc$NewSlot.") + return(obj) +} + + + + +# _________________________________________________________________________________________________ +# Archived ---- +# _________________________________________________________________________________________________ + +".Deprecated" + +#' @title Regress Out and Recalculate Seurat +#' +#' @description The function performs a series of calculations and manipulations on a Seurat object, +#' including identifying variable features, scaling data, running PCA, setting up reductions, finding neighbors, +#' and finding clusters. It optionally performs t-SNE and saves the object. +#' +#' @param obj The Seurat object. +#' @param n.var.features The number of variable features to use. Default: the 'n.var.genes' element from a list 'p'. +#' @param features.scale A logical value indicating whether to scale the data. Default: `TRUE`.. +#' @param vars.to.regress A vector of variable names to be regressed out. +#' @param suffix A character string to be used as a suffix when saving the object. +#' @param nPCs The number of principal components to use. Default: the 'n.PC' element from a list 'p'. +#' @param clust_resolutions The resolution for clustering. Default: the 'snn_res' element from a list 'p'. +#' @param calc_tSNE Logical, if TRUE, t-SNE will be performed. Default: `FALSE`.. +#' @param plot_umaps Logical, if TRUE, UMAP plots will be generated. Default: `TRUE`.. +#' @param save_obj Logical, if TRUE, the object will be saved. Default: `TRUE`.. +#' @param assayX The assay to be used in scaling data. Default: 'RNA'. +#' @return Seurat object after calculations and manipulations. +#' @importFrom Seurat FindVariableFeatures ScaleData RunPCA FindNeighbors FindClusters RunTSNE +#' @importFrom MarkdownReports create_set_OutDir +#' @examples +#' \dontrun{ +#' # Assuming 'seurat_obj' is a valid Seurat object and 'vars' is a vector of variable names to be regressed out. +#' result <- regress_out_and_recalculate_seurat(seurat_obj, vars, suffix = "_regressed") +#' } +#' @importFrom tictoc tic toc +#' +#' @export +regress_out_and_recalculate_seurat <- function( + obj, + n.var.features = p$"n.var.genes", # p is a list of parameters, 2000 + features.scale = n.var.features, + vars.to.regress, + suffix, + nPCs = p$"n.PC", + clust_resolutions = p$"snn_res", + calc_tSNE = FALSE, + plot_umaps = TRUE, + save_obj = TRUE, + assayX = "RNA") { + .Deprecated("processSeuratObject") + + tictoc::tic() + print("FindVariableFeatures") + obj <- FindVariableFeatures(obj, mean.function = "FastExpMean", dispersion.function = "FastLogVMR", nfeatures = n.var.features) + tictoc::toc() + + tictoc::tic() + print("calc.q99.Expression.and.set.all.genes") + obj <- calc.q99.Expression.and.set.all.genes(obj = obj, quantileX = .99) + tictoc::toc() + + tictoc::tic() + print("ScaleData") + obj <- ScaleData(obj, assay = assayX, verbose = TRUE, vars.to.regress = vars.to.regress, features = features.scale) + tictoc::toc() + + tictoc::tic() + print("RunPCA") + obj <- RunPCA(obj, npcs = nPCs, verbose = TRUE) + tictoc::toc() + + tictoc::tic() + print("SetupReductionsNtoKdimensions") + obj <- SetupReductionsNtoKdimensions(obj = obj, nPCs = nPCs, dimensions = 3:2, reduction = "umap") + tictoc::toc() + + tictoc::tic() + print("FindNeighbors") + obj <- FindNeighbors(obj, reduction = "pca", dims = 1:nPCs) + tictoc::toc() + + tictoc::tic() + print("FindClusters") + obj <- FindClusters(obj, resolution = clust_resolutions) + tictoc::toc() + + if (calc_tSNE) { + tictoc::tic() + print("RunTSNE") + obj <- RunTSNE(obj, reduction = "pca", dims = 1:nPCs) + tictoc::toc() + } + + # orig.dir <- getwd() + # new_path <- FixPath(orig.dir, suffix) + # MarkdownReports::create_set_OutDir(new_path) + + clz <- GetClusteringRuns(obj, pat = "*snn_res.*[0-9]$") + + if (plot_umaps) { + print("Plotting umaps") + for (v in clz) clUMAP(ident = v, obj = obj, sub = suffix) + + # MarkdownReports::create_set_OutDir(new_path, 'UMAP_stats') + for (v in vars.to.regress) qUMAP(feature = v, obj = obj, sub = suffix) + # MarkdownReports::create_set_OutDir(new_path) + } + + + if (save_obj) { + print("Save RDS") + isave.RDS(obj, suffix = suffix, inOutDir = TRUE) + } + + return(obj) +} + + +# _________________________________________________________________________________________________ +#' @title Proportion of Cells Expressing Given Genes +#' +#' @description Calculates the proportion of cells expressing one or more specified genes. +#' +#' @param genes Character vector of gene names of interest. +#' @param group.by Optional grouping variable for analysis (e.g., cell type). Default: 'all'. +#' @param obj Seurat object to analyze. Default: `combined.obj`. +#' @param ... Additional arguments. +#' +#' @return Data frame with genes and their cell expression proportion, optionally grouped. +#' +#' @examples +#' \dontrun{ +#' PrctCellExpringGene(genes = c("LTB", "GNLY"), obj = combined.obj) +#' } +#' +#' @source Adapted from Ryan-Zhu on GitHub. +#' +#' @export +PrctCellExpringGene <- function(genes, group.by = "all", obj = combined.obj, + ...) { + .Deprecated("PctCellsExpressingGenes") + # + nf <- setdiff(genes, c(Features(obj, assay = 'RNA'), colnames(obj@m@data))) + + if(length(nf) > 0) message("Some genes/ features not found: ", nf) + + stopifnot("Some genes not foun!." = all(genes %in% Features(obj))) + + if (group.by == "all") { + prct <- 1:length(genes) + for (i in seq(prct)) prct[i] <- ww.calc_helper(genes = genes[1], obj = obj) + result <- data.frame("Markers" = genes, "Cell_proportion" = prct) + return(result) + } else { + ls.Seurat <- Seurat::SplitObject(object = obj, split.by = group.by) + factors <- names(ls.Seurat) + + # This is a self referencing function, how does this supposed to even work?? + results <- lapply(ls.Seurat, PrctCellExpringGene, genes = genes) + for (i in 1:length(factors)) { + results[[i]]$Feature <- factors[i] + } + combined <- do.call("rbind", results) + return(combined) + } +} + + +# _________________________________________________________________________________________________ +#' @title Helper to calculate Cell Expression Proportion for Gene +#' +#' @description Computes the proportion of cells expressing a specific gene within a Seurat object. +#' +#' @param obj Seurat object with cell data. +#' @param genes Single gene name as a character string. +#' @param slot Slot to use for the analysis. Default: 'RNA'. +#' +#' @return Proportion of cells expressing the gene. Returns `NA` if the gene is not found. +#' +#' @examples +#' \dontrun{ +#' ww.calc_helper(obj = seurat_object, genes = "Gene1") +#' } +#' +#' @source Adapted from Ryan-Zhu on GitHub. +#' +#' @export +ww.calc_helper <- function(obj, genes, slot = "RNA") { + .Deprecated("Unused function.") + # stopifnot("Some genes not found!." = all(genes %in% row.names(obj))) + counts <- obj[[slot]]@counts + ncells <- ncol(counts) + if (genes %in% row.names(counts)) { + sum(counts[genes, ] > 0) / ncells + } else { + return(NA) + } +} + + + +#' # _________________________________________________________________________________________________ +#' #' @title seu.add.meta.from.vector +#' #' +#' #' @description Adds a new metadata column to a Seurat object. +#' #' @param obj A Seurat object to which the new metadata column will be added. Default: combined.obj. +#' #' @param metaD.colname A string specifying the name of the new metadata column. Default: metaD.colname.labeled. +#' #' @param Label.per.cell A vector of labels for each cell, to be added as new metadata. Default: Cl.Label.per.cell. +#' #' @return A Seurat object with the new metadata column added. +#' #' @examples +#' #' \dontrun{ +#' #' if (interactive()) { +#' #' # Example usage: +#' #' combined.obj <- seu.add.meta.from.vector( +#' #' obj = combined.obj, +#' #' metaD.colname = metaD.colname.labeled, +#' #' Label.per.cell = Cl.Label.per.cell +#' #' ) +#' #' } +#' #' } +#' #' @export +#' seu.add.meta.from.vector <- function(obj = combined.obj, metaD.colname, Label.per.cell = Cl.Label.per.cell) { +#' .Deprecated("addMetaDataSafe") +#' obj@meta.data[, metaD.colname] <- Label.per.cell +#' iprint(metaD.colname, "contains the named identitites. Use Idents(combined.obj) = '...'. The names are:", unique(Label.per.cell)) +#' return(obj) +#' } +#' +#' + +# # _________________________________________________________________________________________________ +# sparse.cor4 <- function(x){ +# n <- nrow(x) +# cMeans <- colMeans(x) +# covmat <- (as.matrix(crossprod(x)) - n*tcrossprod(cMeans))/(n-1) +# sdvec <- sqrt(diag(covmat)) +# cormat <- covmat/tcrossprod(sdvec) +# list(cov=covmat,cor=cormat) +# } + + +# # _________________________________________________________________________________________________ +# #' @title Find Specific Files in Specified Subdirectories +# #' +# #' @description This function searches through specified subdirectories within a root directory +# #' to find files that match a specified pattern and returns a character vector with their full paths. +# #' The printed output excludes the root directory part from the paths. +# #' +# #' @param root_dir The root directory. +# #' @param subdir A character vector of subdirectory names within the root directory to be scanned. +# #' @param file_name_pattern The pattern of the file name to search for. +# #' @param recursive Boolean indicating whether to search recursively within subdirectories. +# #' @return A character vector containing the full paths to the located files. +# # #' @importFrom fs dir_ls +# #' @export + +# findBamFilesInSubdirs <- function(root_dir, subdir, file_name_pattern = "possorted_genome_bam.bam", recursive = TRUE) { +# stopifnot(is.character(root_dir), length(root_dir) == 1, dir.exists(root_dir), +# is.character(subdir), all(dir.exists(file.path(root_dir, subdir))), +# is.character(file_name_pattern), length(file_name_pattern) == 1, +# is.logical(recursive)) + +# pattern <- paste0("**/", file_name_pattern) +# paths_to_search <- file.path(root_dir, subdir) +# bams <- c() + +# for (path in paths_to_search) { +# iprint("Searching in:", path) +# found_files <- fs::dir_ls(path, recurse = recursive, glob = pattern, type = "file") +# iprint(length(found_files), "files found.") +# bams <- c(bams, found_files) +# } + +# # Replace root_dir in the paths with an empty string for printing +# bams_print <- gsub(paste0("^", root_dir, "/?"), "", bams) +# iprint(length(bams), bams_print) + +# return(bams) +# } + + +# _________________________________________________________________________________________________ +# VISUALIZATION + +# panelCorPearson <- function(x, y, digits = 2, prefix = "", cex.cor = 2, method = "pearson") { +# usr <- par("usr"); on.exit(par(usr)) +# par(usr = c(0, 1, 0, 1)) +# r <- abs(cor(x, y, method = method, use = "complete.obs")) +# txt <- format(c(r, 0.123456789), digits = digits)[1] +# txt <- paste(prefix, txt, sep = "") +# if (missing(cex.cor)) cex <- 0.8/strwidth(txt) +# +# test <- cor.test(x, y) +# Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, +# cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), +# symbols = c("***", "**", "*", ".", " ")) +# +# text(0.5, 0.5, txt, cex = cex * r) +# text(.8, .8, Signif, cex = cex, col = 2) +# } + +# getDiscretePalette <- function( +# ident.used = GetClusteringRuns()[1], +# obj = combined.obj, +# palette.used = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[1], +# show.colors = FALSE, seed = 1989) { +# +# n.clusters <- nrow(unique(obj[[ident.used]])) +# +# colorz <- Seurat::DiscretePalette(n = n.clusters, palette = palette.used) +# +# if (anyNA(colorz)) { +# +# colorsOK <- colorz[!is.na(colorz)] # Extract non-NA values +# n.colz <- length(colorsOK) +# +# msg <- paste("More categories then present in the palette", n.clusters, "vs." +# , n.colz, "in", palette.used, "-> recycling.") +# warning(msg, immediate. = TRUE) +# +# # Resample non-NA values and replace NA values +# set.seed(seed) +# +# if (n.clusters > 10 * n.colz) { +# colorz <- sample(gplots::rich.colors(n.clusters)) +# } else { +# colorz <- sample(x = colorsOK, size = n.clusters, replace = TRUE) +# } +# +# stopif(anyNA(colorz)) +# +# } +# if (show.colors) MarkdownHelpers::color_check(colorz) +# return(colorz) +# } + + +# _________________________________________________________________________________________________ +# _________________________________________________________________________________________________ +# META + +# transferMetadataV1 <- function(from, to, colname_from, colname_to = colname_from, verbose = TRUE, overwrite = FALSE) { +# +# stopifnot( +# is(from, "Seurat"), is(to, "Seurat"), +# is.character(colname_from), is.character(colname_to), +# "Column not found" = colname_from %in% colnames(from@meta.data), +# "Column already exists" = !(colname_to %in% colnames(to@meta.data)) | overwrite +# ) +# +# # Extract the metadata column to transfer +# data.to.transfer <- data.frame(new.metadata = from[[colname_from]]) +# +# # Check cell overlaps +# cells_in_both <- intersect(colnames(from), colnames(to)) +# cells_only_in_from <- setdiff(colnames(from), colnames(to)) +# cells_only_in_to <- setdiff(colnames(to), colnames(from)) +# +# if (verbose) { +# cat("Number and % of cells matching between objects:", length(cells_in_both), +# "(", sprintf("%.2f%%", length(cells_in_both) / length(colnames(from)) * 100), "of from and", +# sprintf("%.2f%%", length(cells_in_both) / length(colnames(to)) * 100), "of to)\n") +# cat("Number and % of cells only in obj1 (from):", length(cells_only_in_from), +# "(", sprintf("%.2f%%", length(cells_only_in_from) / length(colnames(from)) * 100), ")\n") +# cat("Number and % of cells only in obj2 (to):", length(cells_only_in_to), +# "(", sprintf("%.2f%%", length(cells_only_in_to) / length(colnames(to)) * 100), ")\n") +# } +# +# # Add the metadata to the 2nd obj +# to <- Seurat::AddMetaData(object = to, metadata = data.to.transfer, col.name = colname_to ) +# +# return(to) +# } + +# _________________________________________________________________________________________________ + +# #' @title Cluster Composition Analysis +# #' +# #' @description Analyzes and visualizes the composition of clusters in a Seurat object, indicating +# #' the contribution of different datasets to each cluster. +# #' +# #' @param obj Seurat object to analyze. Default: `combined.obj`. +# #' @param ident Cluster identity resolution to use. Default: 'integrated_snn_res.0.3'. +# #' @param splitby Variable to split the data by, typically a project or dataset identifier. +# #' Default: 'ShortNames'. +# #' @param color Bar color. Default: as defined by `splitby`. +# #' @param plot Whether to display the plot. Default: `TRUE`.. +# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: `TRUE`.. +# #' @param ... Additional parameters for plotting functions. +# #' +# #' @return If `plot` is TRUE, displays a bar plot showing the composition of each cluster. Otherwise, +# #' performs the analysis without plotting. +# #' +# #' @examples +# #' get.clustercomposition() +# #' +# #' @export +# #' @importFrom dplyr group_by_ summarise +# #' @importFrom scales percent_format +# get.clustercomposition <- function( +# obj = combined.obj, +# ident = GetClusteringRuns()[1], +# splitby = "orig.ident", +# color = splitby, +# plot = TRUE, ScaleTo100pc = TRUE, +# ...) { +# +# stopifnot(ident %in% colnames(obj@meta.data), +# splitby %in% colnames(obj@meta.data) +# ) +# (df.meta <- obj@meta.data[, c(ident, splitby)]) +# +# try(setwd(OutDir), silent = TRUE) +# +# df.meta |> +# dplyr::group_by_(splitby) |> +# summarise() +# +# categ.per.cluster <- ggbarplot(obj@meta.data, +# x = ident, +# y = splitby, +# color = splitby, +# ... +# ) +# if (ScaleTo100pc) categ.per.cluster <- categ.per.cluster + scale_y_discrete(labels = scales::percent_format()) +# if (plot) categ.per.cluster +# +# # ggExpress::qqSave(categ.per.cluster, ...) +# } + + + +# _________________________________________________________________________________________________ +# #' @title Cluster Composition Analysis +# #' +# #' @description Analyzes and visualizes the composition of clusters in a Seurat object, indicating +# #' the contribution of different datasets to each cluster. +# #' +# #' @param obj Seurat object to analyze. Default: `combined.obj`. +# #' @param ident Cluster identity resolution to use. Default: 'integrated_snn_res.0.3'. +# #' @param splitby Variable to split the data by, typically a project or dataset identifier. +# #' Default: 'ShortNames'. +# #' @param color Bar color. Default: as defined by `splitby`. +# #' @param plot Whether to display the plot. Default: `TRUE`.. +# #' @param ScaleTo100pc Whether to scale Y axis to 100%. Default: `TRUE`.. +# #' @param ... Additional parameters for plotting functions. +# #' +# #' @return If `plot` is TRUE, displays a bar plot showing the composition of each cluster. Otherwise, +# #' performs the analysis without plotting. +# #' +# #' @examples +# #' get.clustercomposition() +# #' +# #' @export +# #' @importFrom dplyr group_by_ summarise +# #' @importFrom scales percent_format +# get.clustercomposition <- function( +# obj = combined.obj, +# ident = GetClusteringRuns()[1], +# splitby = "orig.ident", +# color = splitby, +# plot = TRUE, ScaleTo100pc = TRUE, +# ...) { +# +# stopifnot(ident %in% colnames(obj@meta.data), +# splitby %in% colnames(obj@meta.data) +# ) +# (df.meta <- obj@meta.data[, c(ident, splitby)]) +# +# try(setwd(OutDir), silent = TRUE) +# +# df.meta |> +# dplyr::group_by_(splitby) |> +# summarise() +# +# categ.per.cluster <- ggbarplot(obj@meta.data, +# x = ident, +# y = splitby, +# color = splitby, +# ... +# ) +# if (ScaleTo100pc) categ.per.cluster <- categ.per.cluster + scale_y_discrete(labels = scales::percent_format()) +# if (plot) categ.per.cluster +# +# # ggExpress::qqSave(categ.per.cluster, ...) +# } + + +# # _________________________________________________________________________________________________ +# # Save multiple FeatureHeatmaps from a list of genes on A4 jpeg +# # code for quantile: https://github.com/satijalab/seurat/blob/master/R/plotting_internal.R +# +# #' @title multiFeatureHeatmap.A4 +# #' +# #' @description Save multiple FeatureHeatmaps from a list of genes on A4 jpeg. +# #' @param obj Seurat object, Default: combined.obj +# #' @param list.of.genes A list of genes to plot. No default. +# #' @param gene.per.page Number of genes to plot per page. Default: 5 +# #' @param group.cells.by Cell grouping variable for the heatmap. Default: 'batch' +# #' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap' +# #' @param cex Point size in the plot. Default: iround(3/gene.per.page) +# #' @param sep_scale Logical, whether to scale the features separately. Default: `FALSE`. +# #' @param gene.min.exp Minimum gene expression level for plotting. Default: 'q5' +# #' @param gene.max.exp Maximum gene expression level for plotting. Default: 'q95' +# #' @param jpeg.res Resolution of the jpeg output. Default: 225 +# #' @param jpeg.q Quality of the jpeg output. Default: 90 +# #' @param ... Pass any other parameter to the internally called functions (most of them should work). +# #' @seealso +# #' \code{\link[tictoc]{tic}} +# #' @importFrom tictoc tic toc +# #' +# #' @export +# multiFeatureHeatmap.A4 <- function( +# obj = combined.obj, +# list.of.genes, gene.per.page = 5, +# group.cells.by = "batch", plot.reduction = "umap", +# cex = iround(3 / gene.per.page), sep_scale = FALSE, +# gene.min.exp = "q5", gene.max.exp = "q95", +# jpeg.res = 225, jpeg.q = 90, +# ...) { +# tictoc::tic() +# list.of.genes <- check.genes(list.of.genes, obj = obj) +# +# lsG <- CodeAndRoll2::split_vec_to_list_by_N(1:length(list.of.genes), by = gene.per.page) +# for (i in 1:length(lsG)) { +# print(i) +# genes <- list.of.genes[lsG[[i]]] +# plotname <- kpp(c("FeatureHeatmap", plot.reduction, i, genes, "jpg")) +# print(plotname) +# jjpegA4(plotname, r = jpeg.res, q = jpeg.q) +# try( +# FeatureHeatmap(obj, +# features.plot = genes, group.by = group.cells.by, +# reduction.use = plot.reduction, do.return = FALSE, +# sep.scale = sep_scale, min.exp = gene.min.exp, max.exp = gene.max.exp, +# pt.size = cex, key.position = "top", ... +# ), +# silent = FALSE +# ) +# try.dev.off() +# } +# tictoc::toc() +# } +# + + +# # _________________________________________________________________________________________________ +# #' @title ww.check.if.3D.reduction.exist +# #' +# #' @description ww.check.if.3D.reduction.exist in backup slot # +# #' @param obj Seurat object, Default: obj +# #' @export +# ww.check.if.3D.reduction.exist <- function(obj = obj) { +# if (!("UMAP_3" %in% colnames(obj@reductions$"umap"))) { +# stopif( +# is.null(obj@misc$reductions.backup$"umap3d"), +# "No 3D umap found in backup slot, @misc$reductions.backup. Run SetupReductionsNtoKdimensions() first." +# ) +# RecallReduction(obj = obj, dim = 3, reduction = "umap") +# } else { # Reduction found in normal UMAP slot +# obj +# } +# } diff --git a/man/AddNewAnnotation.Rd b/man/AddNewAnnotation.Rd index a25067d..a40fa4e 100644 --- a/man/AddNewAnnotation.Rd +++ b/man/AddNewAnnotation.Rd @@ -11,13 +11,13 @@ AddNewAnnotation( ) } \arguments{ -\item{obj}{A Seurat object for which the new annotation is to be created. Default is 'obj'.} +\item{obj}{A Seurat object for which the new annotation is to be created. Default: 'obj'.} \item{source}{A character string specifying the existing metadata column to be used as the -basis for the new annotation. Default is 'RNA_snn_res.0.5'.} +basis for the new annotation. Default: 'RNA_snn_res.0.5'.} \item{named.list.of.identities}{A named list providing the mappings for the new annotation. -Default is 'ls.Subset.ClusterLists'.} +Default: 'ls.Subset.ClusterLists'.} } \value{ A character vector representing the new metadata column. diff --git a/man/AutoLabel.KnownMarkers.Rd b/man/AutoLabel.KnownMarkers.Rd index 517aeb6..9e64fe7 100644 --- a/man/AutoLabel.KnownMarkers.Rd +++ b/man/AutoLabel.KnownMarkers.Rd @@ -19,22 +19,22 @@ AutoLabel.KnownMarkers( ) } \arguments{ -\item{obj}{A Seurat object to work with. Default: combined.obj.} +\item{obj}{A Seurat object to work with. Default: \code{combined.obj}.} \item{topN}{The top 'N' genes to consider. Default: 1.} \item{res}{The clustering resolution to use. Default: 0.5.} \item{KnownMarkers}{A character vector containing known marker genes to be used for annotation. -Default: c("TOP2A", "EOMES", "SLA", "HOPX", "S100B", "DLX6-AS1", "POU5F1", "SALL4", "DDIT4", -"PDK1", "SATB2", "FEZF2").} +Default: \verb{c("TOP2A", "EOMES", "SLA", "HOPX", "S100B", "DLX6-AS1", "POU5F1", "SALL4", "DDIT4",} +\verb{"PDK1", "SATB2", "FEZF2")}.} \item{order.by}{Specifies the column to sort the output tibble by. Default: 'combined.score' (First among "combined.score", "avg_log2FC", "p_val_adj").} \item{df_markers}{The data frame of markers. By default, it is stored under \verb{@misc$df.markers$res...} in the provided Seurat object. -Default: combined.obj@misc$df.markers[\link{paste0("res.", res)}].} +Default: \code{combined.obj@misc$df.markers[[paste0("res.", res)]]}.} } \description{ Creates a new "named identity" column in the metadata of a Seurat object, diff --git a/man/AutoLabelTop.logFC.Rd b/man/AutoLabelTop.logFC.Rd index fb1f28d..7246de0 100644 --- a/man/AutoLabelTop.logFC.Rd +++ b/man/AutoLabelTop.logFC.Rd @@ -23,7 +23,7 @@ AutoLabelTop.logFC( \item{group.by}{The clustering group to be used, defaults to the first entry by \code{GetClusteringRuns()}.} -\item{res}{Clustering resolution tag. Default is extracted from \code{group.by}.} +\item{res}{Clustering resolution tag. Default: extracted from \code{group.by}.} \item{plot.top.genes}{Logical indicating whether to show a plot, default is \code{TRUE}.} diff --git a/man/AutoNumber.by.PrinCurve.Rd b/man/AutoNumber.by.PrinCurve.Rd index 7a67883..1604ed5 100644 --- a/man/AutoNumber.by.PrinCurve.Rd +++ b/man/AutoNumber.by.PrinCurve.Rd @@ -18,7 +18,7 @@ AutoNumber.by.PrinCurve( \item{dim}{Dimensions to use, Default: 1:2} -\item{plotit}{Plot results (& show it), Default: TRUE} +\item{plotit}{Plot results (& show it), Default: \code{TRUE}.} \item{swap}{Swap Lambda paramter (multiplied with this) , Default: -1} diff --git a/man/AutoNumber.by.UMAP.Rd b/man/AutoNumber.by.UMAP.Rd index 4c5f0dc..1abae48 100644 --- a/man/AutoNumber.by.UMAP.Rd +++ b/man/AutoNumber.by.UMAP.Rd @@ -23,12 +23,12 @@ Default: 'umap'.} \item{dim}{Dimension along which to order clusters (1 for the first dimension, typically horizontal); Default: 1.} -\item{swap}{If TRUE, reverses the ordering direction; Default: FALSE.} +\item{swap}{If TRUE, reverses the ordering direction; Default: \code{FALSE}.} \item{ident}{Clustering resolution identifier used to fetch cluster labels from \code{obj} metadata; Default: 'integrated_snn_res.0.5'.} -\item{plot}{If TRUE, plots the UMAP with new cluster names; Default: TRUE.} +\item{plot}{If TRUE, plots the UMAP with new cluster names; Default: \code{TRUE}.} } \description{ Automatically renumbers clusters based on their position along a specified dimension diff --git a/man/Calc.Cor.Seurat.Rd b/man/Calc.Cor.Seurat.Rd index e84c138..ea90481 100644 --- a/man/Calc.Cor.Seurat.Rd +++ b/man/Calc.Cor.Seurat.Rd @@ -19,15 +19,15 @@ Calc.Cor.Seurat( \item{slot.use}{The slot to use from the assay in the Seurat object. Default: 'data'} -\item{quantileX}{The quantile level for the calculation. Default: 0.95} +\item{quantileX}{The quantile level for the calculation. Default: \code{0.95}} -\item{max.cells}{Maximum number of cells to be used in the calculation. Default: 40000} +\item{max.cells}{Maximum number of cells to be used in the calculation. Default: \code{40000}} -\item{seed}{The random seed used for the calculation. Default: p$seed} +\item{seed}{The random seed used for the calculation. Default: \code{p$seed}} -\item{digits}{The number of decimal places to round the correlation and covariance values. Default: 2} +\item{digits}{The number of decimal places to round the correlation and covariance values. Default: \code{2}} -\item{obj}{The Seurat object to perform calculations on. Default: combined.obj} +\item{obj}{The Seurat object to perform calculations on. Default: \code{combined.obj}} } \description{ Calculate gene correlation on a Seurat object. diff --git a/man/Convert10Xfolders.Rd b/man/Convert10Xfolders.Rd index 640371f..a5f01f0 100644 --- a/man/Convert10Xfolders.Rd +++ b/man/Convert10Xfolders.Rd @@ -26,29 +26,29 @@ Convert10Xfolders( \arguments{ \item{InputDir}{A character string specifying the input directory.} -\item{regex}{A logical value. If TRUE, the folderPattern is treated as a regular expression. Default is FALSE.} +\item{regex}{A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: \code{FALSE}.} -\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default is 'filtered_feature'.} +\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'.} \item{suffix}{A character string specifying the suffix of the files saved.} -\item{depth}{An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default is 2.} +\item{depth}{An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2.} -\item{min.cells}{An integer value specifying the minimum number of cells. Default is 5.} +\item{min.cells}{An integer value specifying the minimum number of cells. Default: 5.} -\item{min.features}{An integer value specifying the minimum number of features. Default is 200.} +\item{min.features}{An integer value specifying the minimum number of features. Default: 200.} -\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default is TRUE.} +\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default: \code{TRUE}.} -\item{ShowStats}{A logical value indicating whether to show statistics. Default is TRUE.} +\item{ShowStats}{A logical value indicating whether to show statistics. Default: \code{TRUE}.} -\item{writeCBCtable}{A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default is TRUE.} +\item{writeCBCtable}{A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: \code{TRUE}.} -\item{sort_alphanumeric}{sort files alphanumeric? Default: TRUE.} +\item{sort_alphanumeric}{sort files alphanumeric? Default: \code{TRUE}.} -\item{save_empty_droplets}{save empty droplets? Default: TRUE.} +\item{save_empty_droplets}{save empty droplets? Default: \code{TRUE}.} -\item{save}{Save .qs object? Default: TRUE.} +\item{save}{Save .qs object? Default: \code{TRUE}.} } \description{ This function takes a parent directory with a number of subfolders, each diff --git a/man/Convert10Xfolders.old.Rd b/man/Convert10Xfolders.old.Rd index b99e5a2..8c09a71 100644 --- a/man/Convert10Xfolders.old.Rd +++ b/man/Convert10Xfolders.old.Rd @@ -16,15 +16,15 @@ Convert10Xfolders.old( \arguments{ \item{InputDir}{A character string specifying the input directory.} -\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default is 'filtered'.} +\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default: 'filtered'.} -\item{min.cells}{An integer value specifying the minimum number of cells. Default is 10.} +\item{min.cells}{An integer value specifying the minimum number of cells. Default: 10.} -\item{min.features}{An integer value specifying the minimum number of features. Default is 200.} +\item{min.features}{An integer value specifying the minimum number of features. Default: 200.} -\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default is TRUE.} +\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default: \code{TRUE}.} -\item{ShowStats}{A logical value indicating whether to show statistics. Default is TRUE.} +\item{ShowStats}{A logical value indicating whether to show statistics. Default: \code{TRUE}.} } \description{ This function takes a parent directory with a number of subfolders, each containing the standard output of 10X Cell Ranger. It (1) loads the filtered data matrices, (2) converts them to Seurat objects, and (3) saves them as .RDS files. diff --git a/man/Convert10Xfolders_v1.Rd b/man/Convert10Xfolders_v1.Rd index ba5d6d3..764936c 100644 --- a/man/Convert10Xfolders_v1.Rd +++ b/man/Convert10Xfolders_v1.Rd @@ -25,25 +25,25 @@ Convert10Xfolders_v1( \arguments{ \item{InputDir}{A character string specifying the input directory.} -\item{regex}{A logical value. If TRUE, the folderPattern is treated as a regular expression. Default is FALSE.} +\item{regex}{A logical value. If TRUE, the folderPattern is treated as a regular expression. Default: \code{FALSE}.} -\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default is 'filtered_feature'.} +\item{folderPattern}{A character vector specifying the pattern of folder names to be searched. Default: 'filtered_feature'.} -\item{depth}{An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default is 2.} +\item{depth}{An integer value specifying the depth of scan (i.e., how many levels below the InputDir). Default: 2.} -\item{min.cells}{An integer value specifying the minimum number of cells. Default is 5.} +\item{min.cells}{An integer value specifying the minimum number of cells. Default: 5.} -\item{min.features}{An integer value specifying the minimum number of features. Default is 200.} +\item{min.features}{An integer value specifying the minimum number of features. Default: 200.} -\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default is TRUE.} +\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default: \code{TRUE}.} -\item{ShowStats}{A logical value indicating whether to show statistics. Default is TRUE.} +\item{ShowStats}{A logical value indicating whether to show statistics. Default: \code{TRUE}.} -\item{writeCBCtable}{A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default is TRUE.} +\item{writeCBCtable}{A logical value indicating whether to write out a list of cell barcodes (CBC) as a tsv file. Default: \code{TRUE}.} -\item{sample.barcoding}{A logical value indicating whether Cell Ranger was run with sample barcoding. Default is FALSE.} +\item{sample.barcoding}{A logical value indicating whether Cell Ranger was run with sample barcoding. Default: \code{FALSE}.} -\item{sort_alphanumeric}{sort files alphanumeric? Default: TRUE.} +\item{sort_alphanumeric}{sort files alphanumeric? Default: \code{TRUE}.} } \description{ This function takes a parent directory with a number of subfolders, each diff --git a/man/ConvertDropSeqfolders.Rd b/man/ConvertDropSeqfolders.Rd index 422726f..31231dd 100644 --- a/man/ConvertDropSeqfolders.Rd +++ b/man/ConvertDropSeqfolders.Rd @@ -21,25 +21,25 @@ ConvertDropSeqfolders( \arguments{ \item{InputDir}{A character string specifying the input directory.} -\item{folderPattern}{A character string specifying the pattern of folder names to be searched. Default is 'SRR*'.} +\item{folderPattern}{A character string specifying the pattern of folder names to be searched. Default: 'SRR*'.} -\item{filePattern}{A character string specifying the pattern of file names to be searched. Default is 'expression.tsv.gz'.} +\item{filePattern}{A character string specifying the pattern of file names to be searched. Default: 'expression.tsv.gz'.} -\item{useVroom}{A logical value indicating whether to use vroom. Default is TRUE.} +\item{useVroom}{A logical value indicating whether to use vroom. Default: \code{TRUE}.} -\item{col_types.vroom}{A list defining column types for vroom. Default is list("GENE" = "c", .default = "d").} +\item{col_types.vroom}{A list defining column types for vroom. Default: list("GENE" = "c", .default = "d").} -\item{min.cells}{An integer value specifying the minimum number of cells. Default is 10.} +\item{min.cells}{An integer value specifying the minimum number of cells. Default: 10.} -\item{min.features}{An integer value specifying the minimum number of features. Default is 200.} +\item{min.features}{An integer value specifying the minimum number of features. Default: 200.} -\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default is TRUE.} +\item{updateHGNC}{A logical value indicating whether to update the HGNC. Default: \code{TRUE}.} -\item{ShowStats}{A logical value indicating whether to show statistics. Default is TRUE.} +\item{ShowStats}{A logical value indicating whether to show statistics. Default: \code{TRUE}.} -\item{minDimension}{An integer value specifying the minimum dimension. Default is 10.} +\item{minDimension}{An integer value specifying the minimum dimension. Default: 10.} -\item{overwrite}{A logical value indicating whether to overwrite files. Default is FALSE.} +\item{overwrite}{A logical value indicating whether to overwrite files. Default: \code{FALSE}.} } \description{ This function takes a parent directory with a number of subfolders, each diff --git a/man/DimPlot.ClusterNames.Rd b/man/DimPlot.ClusterNames.Rd index 58fd3bf..756e8ed 100644 --- a/man/DimPlot.ClusterNames.Rd +++ b/man/DimPlot.ClusterNames.Rd @@ -6,7 +6,7 @@ \usage{ DimPlot.ClusterNames( obj = combined.obj, - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], reduction = "umap", title = ident, ... diff --git a/man/DiscretePaletteSafe.Rd b/man/DiscretePaletteSafe.Rd index 3e47788..fe5c5ec 100644 --- a/man/DiscretePaletteSafe.Rd +++ b/man/DiscretePaletteSafe.Rd @@ -17,7 +17,7 @@ DiscretePaletteSafe( \item{palette.used}{Palette name to use for generating colors. Options include "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: "alphabet2".} -\item{show.colors}{If TRUE, displays the generated color palette. Default: FALSE.} +\item{show.colors}{If TRUE, displays the generated color palette. Default: \code{FALSE}.} \item{seed}{Seed value for reproducibility, especially when random color generation is involved. Default: 1989.} diff --git a/man/FindCorrelatedGenes.Rd b/man/FindCorrelatedGenes.Rd index 5c720b5..befa261 100644 --- a/man/FindCorrelatedGenes.Rd +++ b/man/FindCorrelatedGenes.Rd @@ -18,13 +18,13 @@ FindCorrelatedGenes( \arguments{ \item{gene}{Gene of interest. Default: 'TOP2A'} -\item{obj}{Seurat object to find the correlated genes from. Default: combined.obj} +\item{obj}{Seurat object to find the correlated genes from. Default: \code{combined.obj}} \item{assay}{Assay to be used from the Seurat object. Default: 'RNA'} \item{slot}{Slot to be used from the specified assay in the Seurat object. Default: 'data'} -\item{HEonly}{Logical, if TRUE, filters matrix to high-expressing genes only. Default: FALSE} +\item{HEonly}{Logical, if TRUE, filters matrix to high-expressing genes only. Default: \code{FALSE}.} \item{minExpr}{Minimum expression level for a gene to be considered. Default: 1} diff --git a/man/FlipReductionCoordinates.Rd b/man/FlipReductionCoordinates.Rd index 0d186e5..cd8048f 100644 --- a/man/FlipReductionCoordinates.Rd +++ b/man/FlipReductionCoordinates.Rd @@ -21,7 +21,7 @@ FlipReductionCoordinates( \item{flip}{Axis (or axes) to flip; can be 'x', 'y', or 'xy' to flip both; Default: "x".} -\item{FlipReductionBackupToo}{Boolean indicating whether to also flip coordinates in the backup slot; Default: TRUE.} +\item{FlipReductionBackupToo}{Boolean indicating whether to also flip coordinates in the backup slot; Default: \code{TRUE}.} } \description{ Flips dimensionality reduction coordinates (such as UMAP or tSNE) vertically or diff --git a/man/GetClusteringRuns.Rd b/man/GetClusteringRuns.Rd index 014a084..c94b022 100644 --- a/man/GetClusteringRuns.Rd +++ b/man/GetClusteringRuns.Rd @@ -12,13 +12,13 @@ GetClusteringRuns( ) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} -\item{res}{Clustering resoluton to use, Default: FALSE} +\item{res}{Clustering resoluton to use, Default: \code{FALSE}.} \item{pat}{Pattern to match, Default: \verb{*snn_res.*[0-9]$}} -\item{v}{} +\item{v}{verbose, Default: \code{TRUE}.} } \value{ Prints and returns the sorted unique cluster names as a character vector. diff --git a/man/GetNamedClusteringRuns.Rd b/man/GetNamedClusteringRuns.Rd index cf7467f..d96fb9d 100644 --- a/man/GetNamedClusteringRuns.Rd +++ b/man/GetNamedClusteringRuns.Rd @@ -14,18 +14,18 @@ GetNamedClusteringRuns( ) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} \item{res}{Clustering resoluton to use, Default: c(FALSE, 0.5)\link{1}} -\item{topgene}{Match clustering named after top expressed gene (see vertesy/Seurat.pipeline/~Diff gene expr.), Default: FALSE} +\item{topgene}{Match clustering named after top expressed gene (see vertesy/Seurat.pipeline/~Diff gene expr.), Default: \code{FALSE}.} \item{pat}{Pattern to match, Default: '^cl.names.Known.*\link{0,1}\.\link{0-9}$'} \item{find.alternatives}{If TRUE, tries to find alternative clustering runs with -the same resolution, Default: TRUE} +the same resolution, Default: \code{TRUE}.} -\item{v}{Verbose output, Default: TRUE} +\item{v}{Verbose output, Default: \code{TRUE}.} } \description{ The \code{GetNamedClusteringRuns} function retrieves metadata column names associated with diff --git a/man/GetNumberOfClusters.Rd b/man/GetNumberOfClusters.Rd index 8502566..666d53a 100644 --- a/man/GetNumberOfClusters.Rd +++ b/man/GetNumberOfClusters.Rd @@ -7,7 +7,7 @@ GetNumberOfClusters(obj = combined.obj) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} } \description{ Get Number Of Clusters # diff --git a/man/GetOrderedClusteringRuns.Rd b/man/GetOrderedClusteringRuns.Rd index be38d53..1ae4e2d 100644 --- a/man/GetOrderedClusteringRuns.Rd +++ b/man/GetOrderedClusteringRuns.Rd @@ -11,9 +11,9 @@ GetOrderedClusteringRuns( ) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj.} +\item{obj}{Seurat object, Default: \code{combined.obj}.} -\item{res}{Clustering resoluton to use, Default: FALSE} +\item{res}{Clustering resoluton to use, Default: \code{FALSE}.} \item{pat}{Pattern to match, Default: '\emph{snn_res.}\link{0,1}\.\link{0-9}\.ordered$'} } diff --git a/man/IntersectGeneLsWithObject.Rd b/man/IntersectGeneLsWithObject.Rd index c6ffa2f..0b12969 100644 --- a/man/IntersectGeneLsWithObject.Rd +++ b/man/IntersectGeneLsWithObject.Rd @@ -22,7 +22,7 @@ IntersectGeneLsWithObject( \item{n_genes_shown}{Number of missing genes to be printed. Default: 10.} -\item{strict}{All genes to be present in the Seurat object? Default: TRUE.} +\item{strict}{All genes to be present in the Seurat object? Default: \code{TRUE}.} \item{verbose}{verbose} } diff --git a/man/LoadAllSeurats.Rd b/man/LoadAllSeurats.Rd index 0c6135a..b6c355b 100644 --- a/man/LoadAllSeurats.Rd +++ b/man/LoadAllSeurats.Rd @@ -18,17 +18,17 @@ LoadAllSeurats( \item{InputDir}{A character string specifying the input directory.} \item{file.pattern}{A character string specifying the pattern of file names to be searched. -Default is '^filtered.+Rds$'.} +Default: '^filtered.+Rds$'.} \item{string.remove1}{A character string or FALSE. If a string is provided, it is removed from -file names. Default is "filtered_feature_bc_matrix.".} +file names. Default: "filtered_feature_bc_matrix.".} \item{string.replace1}{A character string of the new text instead of "string.remove1".} \item{string.remove2}{A character string or FALSE. If a string is provided, it is removed from -file names. Default is ".min.cells.10.min.features.200.Rds".} +file names. Default: ".min.cells.10.min.features.200.Rds".} -\item{sort_alphanumeric}{sort files alphanumeric? Default: TRUE.} +\item{sort_alphanumeric}{sort files alphanumeric? Default: \code{TRUE}.} } \description{ This function loads all Seurat objects found in a directory. It also works with diff --git a/man/PctCellsAboveX.Rd b/man/PctCellsAboveX.Rd index 4725213..99989c2 100644 --- a/man/PctCellsAboveX.Rd +++ b/man/PctCellsAboveX.Rd @@ -7,7 +7,7 @@ PctCellsAboveX( obj = combined.obj, feature = "TOP2A", - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], threshold = 2, suffix = ppp(substitute(obj), ncol(obj), "thr", threshold), box = FALSE, @@ -31,17 +31,17 @@ PctCellsAboveX( \item{threshold}{The threshold value to evaluate the feature against. Default: 2.} -\item{box}{Logical value indicating whether to plot the boxplot. Default: TRUE.} +\item{box}{Logical value indicating whether to plot the boxplot. Default: \code{TRUE}.} \item{subset_ident}{The identity class to subset the data by. Default: NULL.} \item{subset_values}{The values of the identity class to keep in the subset. Default: NULL.} -\item{omit.na}{Logical value indicating whether to omit NA values. Default: TRUE.} +\item{omit.na}{Logical value indicating whether to omit NA values. Default: \code{TRUE}.} \item{assay}{The assay to use for feature extraction. Default: 'RNA'.} -\item{plot}{Logical value indicating whether to plot the results. Default: TRUE.} +\item{plot}{Logical value indicating whether to plot the results. Default: \code{TRUE}.} \item{ylab}{The label for the y-axis of the plot. Default: "\% cells above threshold".} @@ -59,6 +59,6 @@ feature in a Seurat object. It can subset the data based on a specified identity } \examples{ \dontrun{ -PctCellsAboveX(obj = seurat_object, feature = 'GeneA', ident = 'CellType', threshold = 1.5) +PctCellsAboveX(obj = seurat_object, feature = "GeneA", ident = "CellType", threshold = 1.5) } } diff --git a/man/PercentInTranscriptome.Rd b/man/PercentInTranscriptome.Rd index 794d4d0..1689aec 100644 --- a/man/PercentInTranscriptome.Rd +++ b/man/PercentInTranscriptome.Rd @@ -16,10 +16,10 @@ PercentInTranscriptome( \item{obj}{A Seurat object containing gene expression data.} \item{n.genes.barplot}{The number of top genes to be displayed in the final barplot, showing -their expression as a percentage of the total UMIs. Default is 25.} +their expression as a percentage of the total UMIs. Default: 25.} \item{width.barplot}{The width of the barplot that visualizes the highest expressed genes. -Default is a quarter of \code{n.genes.barplot}.} +Default: a quarter of \code{n.genes.barplot}.} } \value{ The same Seurat object passed as input, but with an additional list in the \verb{@misc} slot diff --git a/man/Plot3D.ListOfCategories.Rd b/man/Plot3D.ListOfCategories.Rd index 01e5a3d..700dd39 100644 --- a/man/Plot3D.ListOfCategories.Rd +++ b/man/Plot3D.ListOfCategories.Rd @@ -14,13 +14,13 @@ Plot3D.ListOfCategories( ) } \arguments{ -\item{obj}{A Seurat object for which the plot is to be created. Default is 'combined.obj'.} +\item{obj}{A Seurat object for which the plot is to be created. Default: 'combined.obj'.} -\item{annotate.by}{Character vector specifying the metadata column to be used for annotating the plot. Default is 'integrated_snn_res.0.7'.} +\item{annotate.by}{Character vector specifying the metadata column to be used for annotating the plot. Default: 'integrated_snn_res.0.7'.} -\item{cex}{Numeric value specifying the point size on the plot. Default is 1.25.} +\item{cex}{Numeric value specifying the point size on the plot. Default: 1.25.} -\item{default.assay}{Character vector specifying the assay to be used. Default is 'RNA' (second element in the vector c("integrated", "RNA")).} +\item{default.assay}{Character vector specifying the assay to be used. Default: 'RNA' (second element in the vector c("integrated", "RNA")).} \item{ListOfCategories}{Character vector specifying the categories to be included in the plot. Default categories are "v.project", "experiment", "Phase", "integrated_snn_res.0.7".} diff --git a/man/PlotFilters.Rd b/man/PlotFilters.Rd index 2d68c11..89d19e5 100644 --- a/man/PlotFilters.Rd +++ b/man/PlotFilters.Rd @@ -47,7 +47,7 @@ PlotFilters( \item{above.nFeature_RNA}{Upper bound of RNA features. Default: \code{p$thr.hp.nFeature_RNA}.} -\item{subdir}{Subdirectory within \code{parentdir} where plots will be stored. Default is generated using a call to \code{kpp()}.} +\item{subdir}{Subdirectory within \code{parentdir} where plots will be stored. Default: generated using a call to \code{kpp()}.} \item{transparency}{Point transparency on scatter plots. Default: 0.25.} @@ -68,8 +68,8 @@ parameters, present in the global environment, with elements \code{thr.lp.mito}, \dontrun{ if (interactive()) { # !! Default arguments assume that `p` is a list of parameters, present in the global - environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`, - `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. + # environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`, + # `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`. PlotFilters(ls.Seurat) } } diff --git a/man/RelabelSmallCategories.Rd b/man/RelabelSmallCategories.Rd index 2f2ddf1..df5a2df 100644 --- a/man/RelabelSmallCategories.Rd +++ b/man/RelabelSmallCategories.Rd @@ -10,7 +10,7 @@ RelabelSmallCategories( backup_col_name = ppp(col_in, "orig"), min_count = 100, small_label = "Other", - v = T + v = TRUE ) } \arguments{ diff --git a/man/SelectHighlyExpressedGenesq99.Rd b/man/SelectHighlyExpressedGenesq99.Rd index 2f44c78..60a090c 100644 --- a/man/SelectHighlyExpressedGenesq99.Rd +++ b/man/SelectHighlyExpressedGenesq99.Rd @@ -21,7 +21,7 @@ SelectHighlyExpressedGenesq99( expressed. Default: 0.} \item{sort}{A logical flag indicating whether to sort the filtered genes by their expression -levels in decreasing order. Default: FALSE.} +levels in decreasing order. Default: \code{FALSE}.} } \value{ A vector of gene names that are found both in the input 'genes' vector and the Seurat diff --git a/man/SeuratColorVector.Rd b/man/SeuratColorVector.Rd index dbba809..de3fbbc 100644 --- a/man/SeuratColorVector.Rd +++ b/man/SeuratColorVector.Rd @@ -19,11 +19,11 @@ If NULL, the active identity in \code{obj} is used. Default: NULL.} Default: \code{combined.obj}.} \item{plot.colors}{If TRUE, visually displays the color scheme. -Default: FALSE.} +Default: \code{FALSE}.} \item{simple}{If TRUE, returns only the unique set of colors used. If FALSE, returns a named vector mapping cluster identities to colors. -Default: FALSE.} +Default: \code{FALSE}.} } \description{ Extracts and optionally displays the color scheme assigned to cluster identities diff --git a/man/addGeneClassFractions.Rd b/man/addGeneClassFractions.Rd index 244caf3..730b50d 100644 --- a/man/addGeneClassFractions.Rd +++ b/man/addGeneClassFractions.Rd @@ -18,7 +18,7 @@ addGeneClassFractions( \item{gene_fractions}{A named list containing gene symbol patterns for each meta column name. Default: List of predefined gene fractions.} -\item{add_hga}{A logical value indicating whether to add percent.HGA meta data. Default: TRUE.} +\item{add_hga}{A logical value indicating whether to add percent.HGA meta data. Default: \code{TRUE}.} } \value{ An updated Seurat object. diff --git a/man/addMetaDataSafe.Rd b/man/addMetaDataSafe.Rd index 6857887..89b96ee 100644 --- a/man/addMetaDataSafe.Rd +++ b/man/addMetaDataSafe.Rd @@ -9,7 +9,7 @@ addMetaDataSafe( metadata, col.name, overwrite = FALSE, - verbose = F, + verbose = FALSE, strict = TRUE ) } diff --git a/man/addMetaFraction.Rd b/man/addMetaFraction.Rd index 681d1b7..ce76c68 100644 --- a/man/addMetaFraction.Rd +++ b/man/addMetaFraction.Rd @@ -25,9 +25,9 @@ addMetaFraction( \item{layer}{Name of the layer to be used. Default: 'data'} -\item{gene.set}{A set of gene symbols. If specified, it will be used instead of gene.symbol.pattern. Default: FALSE} +\item{gene.set}{A set of gene symbols. If specified, it will be used instead of gene.symbol.pattern. Default: \code{FALSE}.} -\item{verbose}{Logical indicating whether to display detailed messages (TRUE) or not (FALSE). Default: TRUE} +\item{verbose}{Logical indicating whether to display detailed messages (TRUE) or not (FALSE). Default: \code{TRUE}.} } \description{ Add a new metadata column to a Seurat object, representing the fraction of a gene set in the transcriptome (expressed as a percentage). diff --git a/man/calc.cluster.averages.Rd b/man/calc.cluster.averages.Rd index 41ccea1..aa5f656 100644 --- a/man/calc.cluster.averages.Rd +++ b/man/calc.cluster.averages.Rd @@ -42,21 +42,21 @@ calc.cluster.averages( \arguments{ \item{col_name}{The name of the column for which the average is calculated. Default: 'Score.GO.0006096'.} -\item{plot.UMAP.too}{Whether to plot a UMAP as well. Default: TRUE.} +\item{plot.UMAP.too}{Whether to plot a UMAP as well. Default: \code{TRUE}.} -\item{return.plot}{Whether to return the plot. Default: FALSE.} +\item{return.plot}{Whether to return the plot. Default: \code{FALSE}.} -\item{obj}{The main Seurat object used for calculations. Default: combined.obj.} +\item{obj}{The main Seurat object used for calculations. Default: \code{combined.obj}.} \item{split_by}{Cluster to split by. Default: First entry of GetNamedClusteringRuns().} -\item{scale.zscore}{Whether to scale z-scores. Default: FALSE.} +\item{scale.zscore}{Whether to scale z-scores. Default: \code{FALSE}.} -\item{simplify}{Whether to simplify the result. Default: TRUE.} +\item{simplify}{Whether to simplify the result. Default: \code{TRUE}.} -\item{plotit}{Whether to plot the results. Default: TRUE.} +\item{plotit}{Whether to plot the results. Default: \code{TRUE}.} -\item{histogram}{Whether to produce a histogram. Default: FALSE.} +\item{histogram}{Whether to produce a histogram. Default: \code{FALSE}.} \item{nbins}{The number of bins for the histogram. Default: 50.} @@ -66,9 +66,9 @@ calc.cluster.averages( \item{quantile.thr}{The threshold for quantiles. Default: 0.9.} -\item{absolute.thr}{Absolute threshold used in computations. Default: FALSE.} +\item{absolute.thr}{Absolute threshold used in computations. Default: \code{FALSE}.} -\item{filter}{The filter mode: 'above', 'below', or FALSE. Default: FALSE.} +\item{filter}{The filter mode: 'above', 'below', or FALSE. Default: \code{FALSE}.} \item{ylab.text}{Text for the y-axis label. Default: "Cluster" followed by the statistical method and "score".} @@ -84,7 +84,7 @@ calc.cluster.averages( \item{xlb}{The label for the x-axis. Default depends on the 'absolute.thr' parameter.} -\item{fname}{The filename for the plot. Default is based on column name and split_by value.} +\item{fname}{The filename for the plot. Default: based on column name and split_by value.} } \description{ Calculates the average of a metadata column (numeric) per cluster. diff --git a/man/calc.q99.Expression.and.set.all.genes.Rd b/man/calc.q99.Expression.and.set.all.genes.Rd index 09c5e71..e3663b7 100644 --- a/man/calc.q99.Expression.and.set.all.genes.Rd +++ b/man/calc.q99.Expression.and.set.all.genes.Rd @@ -18,7 +18,7 @@ calc.q99.Expression.and.set.all.genes( ) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} \item{quantileX}{Quantile level, Default: 0.9} @@ -28,13 +28,13 @@ calc.q99.Expression.and.set.all.genes( \item{assay}{RNA or integrated assay, Default: c("RNA", "integrated")\link{1}} -\item{set.misc}{Create the "all.genes" variable in @misc? Default: TRUE} +\item{set.misc}{Create the "all.genes" variable in @misc? Default: \code{TRUE}.} -\item{assign_to_global_env}{Create the "all.genes" variable in the global env?, Default: TRUE} +\item{assign_to_global_env}{Create the "all.genes" variable in the global env?, Default: \code{TRUE}.} -\item{plot}{Plot the expression distribution? Default: TRUE} +\item{plot}{Plot the expression distribution? Default: \code{TRUE}.} -\item{show}{Show the distribution plot? Default: TRUE} +\item{show}{Show the distribution plot? Default: \code{TRUE}.} } \description{ Calculate the gene expression of the e.g.: 99th quantile (expression in the top 1\% cells). diff --git a/man/calculatePercentageMatch.Rd b/man/calculatePercentageMatch.Rd index e198f39..d7e4acc 100644 --- a/man/calculatePercentageMatch.Rd +++ b/man/calculatePercentageMatch.Rd @@ -25,7 +25,7 @@ Default: c("AAV.detected.min2", "AAV.detected").} \item{match.values}{A named vector where names correspond to \code{meta.features} and values are the strings to match against. Default: c("AAV.detected.min2" = "AAV", "AAV.detected" = "AAV").} -\item{verbose}{A logical value indicating whether to print detailed output. Default: TRUE.} +\item{verbose}{A logical value indicating whether to print detailed output. Default: \code{TRUE}.} \item{max.categ}{The maximum number of categories allowed before stopping. Default: 30.} } diff --git a/man/clUMAP.Rd b/man/clUMAP.Rd index 6b75241..e3f1b34 100644 --- a/man/clUMAP.Rd +++ b/man/clUMAP.Rd @@ -76,9 +76,9 @@ clUMAP( \item{cells.highlight}{Specific cells to be highlighted; optional; Default: NULL.} -\item{label}{Show cluster labels; Default: TRUE.} +\item{label}{Show cluster labels; Default: \code{TRUE}.} -\item{repel}{Repel labels to avoid overlap; Default: TRUE.} +\item{repel}{Repel labels to avoid overlap; Default: \code{TRUE}.} \item{legend}{Show legend; Default: opposite of \code{label}.} @@ -86,15 +86,15 @@ clUMAP( \item{MaxCategThrHP}{Maximum number of categories before simplification; Default: 200.} -\item{axes}{Show axes; Default: FALSE.} +\item{axes}{Show axes; Default: \code{FALSE}.} -\item{aspect.ratio}{Fixed aspect ratio for the plot; Default: TRUE.} +\item{aspect.ratio}{Fixed aspect ratio for the plot; Default: \code{TRUE}.} -\item{save.plot}{Save plot to file; Default: TRUE.} +\item{save.plot}{Save plot to file; Default: \code{TRUE}.} -\item{PNG}{Save as PNG (TRUE) or PDF (FALSE); Default: TRUE.} +\item{PNG}{Save as PNG (TRUE) or PDF (FALSE); Default: \code{TRUE}.} -\item{check_for_2D}{Ensure UMAP is 2D; Default: TRUE.} +\item{check_for_2D}{Ensure UMAP is 2D; Default: \code{TRUE}.} \item{...}{Additional parameters for \code{DimPlot}.} } diff --git a/man/compareVarFeaturesAndRanks.Rd b/man/compareVarFeaturesAndRanks.Rd index b9e5721..392e988 100644 --- a/man/compareVarFeaturesAndRanks.Rd +++ b/man/compareVarFeaturesAndRanks.Rd @@ -20,7 +20,7 @@ compareVarFeaturesAndRanks( \item{obj2}{The second Seurat object for comparison. Default: NULL.} \item{cor.plot}{An optional boolean indicating whether to generate a scatterplot of the ranks -of common genes. Default: FALSE.} +of common genes. Default: \code{FALSE}.} \item{save.plot}{save.plot} diff --git a/man/dot-parseKeyParams.Rd b/man/dot-parseKeyParams.Rd index d4fae46..e03e8d8 100644 --- a/man/dot-parseKeyParams.Rd +++ b/man/dot-parseKeyParams.Rd @@ -11,7 +11,7 @@ return.as.name = FALSE, assay = Seurat::DefaultAssay(obj), suffix = NULL, - v = T + v = TRUE ) } \arguments{ @@ -22,7 +22,7 @@ If NULL, the function will attempt to extract the variables from the \code{objec \item{nrVarFeatures}{You can provide this number manually. Default: NULL.} -\item{return.as.name}{If TRUE, returns the name of the object. Default: FALSE.} +\item{return.as.name}{If TRUE, returns the name of the object. Default: \code{FALSE}.} \item{assay}{The assay to extract scaled features from. Default: "RNA".} diff --git a/man/dot-saveRDS.compress.in.BG.Rd b/man/dot-saveRDS.compress.in.BG.Rd index 16410ff..9d9940d 100644 --- a/man/dot-saveRDS.compress.in.BG.Rd +++ b/man/dot-saveRDS.compress.in.BG.Rd @@ -15,11 +15,11 @@ \arguments{ \item{obj}{Seurat object.} -\item{compr}{Compress at all? Default: TRUE} +\item{compr}{Compress at all? Default: \code{TRUE}.} \item{fname}{File name} -\item{compress_internally}{Compress by R? Default: FALSE (still compressed in background via CLI).} +\item{compress_internally}{Compress by R? Default: \code{FALSE}. (still compressed in background via CLI).} \item{...}{Additional parameters passed to saveRDS() function.} } diff --git a/man/downsampleListSeuObjsNCells.Rd b/man/downsampleListSeuObjsNCells.Rd index cd07594..3f62c18 100644 --- a/man/downsampleListSeuObjsNCells.Rd +++ b/man/downsampleListSeuObjsNCells.Rd @@ -16,7 +16,7 @@ downsampleListSeuObjsNCells( \item{NrCells}{Target number of cells to downsample each Seurat object to.} \item{save_object}{Logical indicating whether to save the downsampled Seurat objects using \code{isaveRDS} -or to return them; Default: FALSE.} +or to return them; Default: \code{FALSE}.} } \description{ Downsampling each Seurat object in a list to a specified number of cells. This function is diff --git a/man/downsampleListSeuObjsPercent.Rd b/man/downsampleListSeuObjsPercent.Rd index fecddfc..8b88281 100644 --- a/man/downsampleListSeuObjsPercent.Rd +++ b/man/downsampleListSeuObjsPercent.Rd @@ -17,7 +17,7 @@ downsampleListSeuObjsPercent( \item{fraction}{Fraction of cells to retain in each Seurat object; Default: 0.1.} \item{save_object}{Logical indicating whether to save the downsampled Seurat objects using -\code{isaveRDS} or return them; Default: FALSE.} +\code{isaveRDS} or return them; Default: \code{FALSE}.} } \description{ Downsampling a list of Seurat objects to a specified fraction of their original size. diff --git a/man/downsampleSeuObj.Rd b/man/downsampleSeuObj.Rd index a9ef02f..dcbdb72 100644 --- a/man/downsampleSeuObj.Rd +++ b/man/downsampleSeuObj.Rd @@ -17,7 +17,7 @@ downsampleSeuObj( \item{fractionCells}{The fraction of the object's data to keep. Default: 0.25.} \item{nCells}{If set to a number greater than 1, indicates the absolute number of cells to keep. -If FALSE, the function uses 'fractionCells' to determine the number of cells. Default: FALSE.} +If FALSE, the function uses 'fractionCells' to determine the number of cells. Default: \code{FALSE}.} \item{seed}{A seed for random number generation to ensure reproducible results. Default: 1989.} } diff --git a/man/filterNcGenes.Rd b/man/filterNcGenes.Rd index e3cb885..4d9867a 100644 --- a/man/filterNcGenes.Rd +++ b/man/filterNcGenes.Rd @@ -21,7 +21,7 @@ Default: c("^AC.", "^AL.", "^c\link{1-9}orf", "\\.AS\link{1-9}$").} \item{v}{"verbose" Whether to print the number of genes before and after filtering.} -\item{unique}{Whether to return unique gene symbols. Default: TRUE.} +\item{unique}{Whether to return unique gene symbols. Default: \code{TRUE}.} \item{...}{Additional arguments to pass to \code{\link[stringr]{str_detect}}.} } diff --git a/man/getCellIDs.from.meta.Rd b/man/getCellIDs.from.meta.Rd index 27a6b21..fd2a191 100644 --- a/man/getCellIDs.from.meta.Rd +++ b/man/getCellIDs.from.meta.Rd @@ -14,11 +14,11 @@ getCellIDs.from.meta( \arguments{ \item{ident}{A string specifying the name of the metadata column from which to retrieve cell IDs. Default: 'res.0.6'.} -\item{ident_values}{A vector of values to match in the metadata column. Default: NA.} +\item{ident_values}{A vector of values to match in the metadata column. Default: \code{NA}.} \item{obj}{The Seurat object from which to retrieve the cell IDs. Default: combined.obj.} -\item{inverse}{A boolean value indicating whether to inverse the match, i.e., retrieve cell IDs that do not match the provided list of ident_values. Default: FALSE.} +\item{inverse}{A boolean value indicating whether to inverse the match, i.e., retrieve cell IDs that do not match the provided list of ident_values. Default: \code{FALSE}.} } \value{ A vector of cell IDs that match (or don't match, if \code{inverse = TRUE}) the provided list of values. diff --git a/man/getClusterColors.Rd b/man/getClusterColors.Rd index b5aa6c8..81c4b32 100644 --- a/man/getClusterColors.Rd +++ b/man/getClusterColors.Rd @@ -16,7 +16,7 @@ getClusterColors( \item{obj}{Seurat object containing clustering information.} \item{use_new_palettes}{Logical indicating whether to use custom palettes defined in -\code{DiscretePalette} function. Default: TRUE.} +\code{DiscretePalette} function. Default: \code{TRUE}.} \item{palette}{Name of the color palette to use if \code{use_new_palettes} is TRUE. Options: "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: "glasbey".} @@ -24,7 +24,7 @@ Options: "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: " \item{ident}{Clustering identity to use for coloring. Retrieved from the first entry of \code{GetClusteringRuns()} by default.} -\item{show}{If TRUE, displays a plot showing the color mapping for each cluster. Default: TRUE.} +\item{show}{If TRUE, displays a plot showing the color mapping for each cluster. Default: \code{TRUE}.} } \description{ Regenerate and optionally displays the color scheme associated with the clusters diff --git a/man/getDiscretePalette.Rd b/man/getDiscretePalette.Rd index 7c159da..0f4b32d 100644 --- a/man/getDiscretePalette.Rd +++ b/man/getDiscretePalette.Rd @@ -14,7 +14,7 @@ getDiscretePalette() \item{palette.used}{The name of the palette to use, Default: c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")\link{1}} -\item{show.colors}{Whether to display the colors in the palette, Default: FALSE} +\item{show.colors}{Whether to display the colors in the palette, Default: \code{FALSE}.} } \description{ Safe wrapper around Seurat's DiscretePalette(), which returns NA's if too many @@ -26,4 +26,5 @@ if (interactive()) { getDiscretePalette() } } + } diff --git a/man/getDiscretePaletteObj.Rd b/man/getDiscretePaletteObj.Rd index 2de2595..4d34d71 100644 --- a/man/getDiscretePaletteObj.Rd +++ b/man/getDiscretePaletteObj.Rd @@ -20,7 +20,7 @@ getDiscretePaletteObj( \item{palette.used}{The palette name to use for color generation. Options include "alphabet", "alphabet2", "glasbey", "polychrome", and "stepped". Default: "alphabet2".} -\item{show.colors}{If TRUE, displays the generated colors. Default: FALSE.} +\item{show.colors}{If TRUE, displays the generated colors. Default: \code{FALSE}.} \item{seed}{Seed for random color generation, ensuring reproducibility. Default: 1989.} } diff --git a/man/getMetadataColumn.Rd b/man/getMetadataColumn.Rd index b4647be..b348c60 100644 --- a/man/getMetadataColumn.Rd +++ b/man/getMetadataColumn.Rd @@ -11,7 +11,7 @@ getMetadataColumn(col = "batch", obj = combined.obj, as_numeric = FALSE) \item{obj}{A Seurat object from which the metadata column will be retrieved. Default: combined.obj.} -\item{as_numeric}{A logical flag indicating whether the returned values should be converted to numeric format. Default: FALSE (FALSE).} +\item{as_numeric}{A logical flag indicating whether the returned values should be converted to numeric format. Default: \code{FALSE}. (FALSE).} } \value{ A named vector containing the values from the specified metadata column. If 'as_numeric' is TRUE, the values are converted to numeric format. diff --git a/man/isave.RDS.Rd b/man/isave.RDS.Rd index b279187..3b787f2 100644 --- a/man/isave.RDS.Rd +++ b/man/isave.RDS.Rd @@ -22,29 +22,29 @@ isave.RDS( \arguments{ \item{obj}{The object to be saved, typically a Seurat object.} -\item{prefix}{A string prefix added to the filename. Default is NULL.} +\item{prefix}{A string prefix added to the filename. Default: NULL.} -\item{suffix}{A string suffix added to the filename. Default is NULL.} +\item{suffix}{A string suffix added to the filename. Default: NULL.} \item{inOutDir}{A boolean flag, if TRUE the OutDir is used as save directory, if FALSE the -alternative_path_rdata is used. Default is TRUE} +alternative_path_rdata is used. Default: \code{TRUE}.} \item{project}{A string representing the project code. This is appended to the saved file name. -Default is the active project determined by getProject().} +Default: the active project determined by getProject().} \item{alternative_path_rdata}{A string that specifies the alternative path for storing the -RDS file if inOutDir is FALSE. Default is "~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/" +RDS file if inOutDir is FALSE. Default: "~/Dropbox (VBC)/Abel.IMBA/AnalysisD/_RDS.files/" appended with the basename of OutDir.} -\item{homepath}{A string representing the homepath. Will be replaced by '~' in the file path. Default is '~/'.} +\item{homepath}{A string representing the homepath. Will be replaced by '~' in the file path. Default: '~/'.} \item{showMemObject}{A boolean flag, if TRUE the function will print out the memory size of the -largest objects in the workspace. Default is TRUE.} +largest objects in the workspace. Default: \code{TRUE}.} \item{saveParams}{A boolean flag, if TRUE the parameters 'p' and 'all.genes' are added to the -'misc' slot of the Seurat object if the object is of class Seurat. Default is TRUE.} +'misc' slot of the Seurat object if the object is of class Seurat. Default: \code{TRUE}.} -\item{compress}{Compress .Rds file after writing? Default is TRUE.} +\item{compress}{Compress .Rds file after writing? Default: \code{TRUE}.} \item{test_read}{Provide command to test validity by reading in the object just written.} } diff --git a/man/isave.image.Rd b/man/isave.image.Rd index c0f3be7..2040f6d 100644 --- a/man/isave.image.Rd +++ b/man/isave.image.Rd @@ -15,12 +15,12 @@ isave.image( \item{...}{Additional parameters passed to the idate() function in the creation of the file name.} \item{path_rdata}{A string that specifies the path for storing the image of the workspace. -Default is "~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/" appended with the basename of OutDir.} +Default: "~/Dropbox/Abel.IMBA/AnalysisD/_Rdata.files/" appended with the basename of OutDir.} \item{showMemObject}{A boolean flag, if TRUE the function will print out the memory size of the -largest objects in the workspace. Default is TRUE.} +largest objects in the workspace. Default: \code{TRUE}.} -\item{options}{A string for gzip options. Default is "--force".} +\item{options}{A string for gzip options. Default: "--force".} } \description{ Save an image of the current workspace using a faster and efficient compression diff --git a/man/load10Xv3.Rd b/man/load10Xv3.Rd index f5f6d32..08d5074 100644 --- a/man/load10Xv3.Rd +++ b/man/load10Xv3.Rd @@ -19,22 +19,22 @@ load10Xv3( This directory should include subdirectories for raw and filtered data, typically named starting with \code{raw_} and \code{filt_}, respectively.} -\item{cellIDs}{An optional vector of cell IDs to include in the loaded data. Default is \code{NULL}, +\item{cellIDs}{An optional vector of cell IDs to include in the loaded data. Default: \code{NULL}, indicating that all available cells will be included. This is useful for subsetting the data based on specific cell IDs.} \item{channelName}{An optional string specifying the channel name for the data being loaded. -This can be used to label the data according to the experimental condition or sample name. Default is \code{NULL}.} +This can be used to label the data according to the experimental condition or sample name. Default: \code{NULL}.} \item{readArgs}{A list of additional arguments to pass to the internal \code{Read10X} function used for -loading the data. Default is an empty list.} +loading the data. Default: an empty list.} \item{includeFeatures}{A character vector specifying which features to include in the loaded data. Common values include "Gene Expression", "Antibody Capture", and "CRISPR Guide Capture". -Default is \code{c("Gene Expression")}.} +Default: \code{c("Gene Expression")}.} \item{verbose}{A logical flag indicating whether to print progress messages and status updates as the -data is loaded. Default is \code{TRUE}.} +data is loaded. Default: \code{TRUE}.} \item{...}{Additional arguments passed to other internally called functions, if applicable.} } diff --git a/man/make10Xcellname.Rd b/man/make10Xcellname.Rd index 3481b06..7e0fde5 100644 --- a/man/make10Xcellname.Rd +++ b/man/make10Xcellname.Rd @@ -9,7 +9,7 @@ make10Xcellname(cellnames, suffix = "_1") \arguments{ \item{cellnames}{A vector of cell names without numeric suffixes.} -\item{suffix}{The suffix to add to each cell name. Default is '_1'.} +\item{suffix}{The suffix to add to each cell name. Default: '_1'.} } \value{ A vector of cell names with the specified suffix appended. diff --git a/man/multiFeaturePlot.A4.Rd b/man/multiFeaturePlot.A4.Rd index 1859ce6..0241926 100644 --- a/man/multiFeaturePlot.A4.Rd +++ b/man/multiFeaturePlot.A4.Rd @@ -38,12 +38,12 @@ multiFeaturePlot.A4( \item{obj}{Seurat object, Default: combined.obj} -\item{subdir}{Should plots be saved in a sub-directory? Default: TRUE} +\item{subdir}{Should plots be saved in a sub-directory? Default: \code{TRUE}.} \item{foldername}{Folder name to save the generated plots. Default: The name of the list of genes.} \item{subtitle.from.names}{Should the subtitle be extracted from the names of the gene symbols, -eg: \code{c("Astrocytes" = "AQP4")} ? Default: TRUE} +eg: \code{c("Astrocytes" = "AQP4")} ? Default: \code{TRUE}.} \item{plot.reduction}{Dimension reduction technique to use for plots. Default: 'umap'} @@ -71,7 +71,7 @@ eg: \code{c("Astrocytes" = "AQP4")} ? Default: TRUE} \item{aspect.ratio}{Should the aspect ratio be fixed? Default: Yes, at 0.6} -\item{saveGeneList}{Should the list of genes be saved? Default: FALSE} +\item{saveGeneList}{Should the list of genes be saved? Default: \code{FALSE}.} \item{w}{Width of the plot. Default: 8.27} diff --git a/man/plot.Gene.Cor.Heatmap.Rd b/man/plot.Gene.Cor.Heatmap.Rd index f4e7e14..6478131 100644 --- a/man/plot.Gene.Cor.Heatmap.Rd +++ b/man/plot.Gene.Cor.Heatmap.Rd @@ -18,22 +18,22 @@ ) } \arguments{ -\item{genes}{Vector of gene symbols to include in the correlation analysi.} +\item{genes}{Vector of gene symbols to include in the correlation analysis and heatmap.} \item{assay.use}{Assay from which to retrieve expression data within the Seurat object; Default: 'RNA'.} -\item{slot.use}{Specifies which slot of the assay to use for expression data ('data', 'scale.data', 'data.imputed'); -Default: first item ('data').} +\item{slot.use}{Specifies which slot of the assay to use for expression data \verb{('data', 'scale.data', 'data.imputed')}; +Default: first item \code{('data')}.} -\item{quantileX}{Quantile level for calculating expression thresholds; Default: 0.95.} +\item{quantileX}{Quantile level for calculating expression thresholds; Default: \code{0.95}.} -\item{min.g.cor}{Minimum absolute gene correlation value for inclusion in the heatmap; Default: 0.3.} +\item{min.g.cor}{Minimum absolute gene correlation value for inclusion in the heatmap; Default: \code{0.3}.} -\item{calc.COR}{Logical flag to calculate correlation matrix if not found in \verb{@misc}; Default: FALSE.} +\item{calc.COR}{Logical flag to calculate correlation matrix if not found in \verb{@misc}; Default: \code{FALSE.}} -\item{cutRows}{Height at which to cut the dendrogram for rows, determining cluster formation; Default: NULL.} +\item{cutRows}{Height at which to cut the dendrogram for rows, determining cluster formation; Default: \code{NULL.}} -\item{cutCols}{Height at which to cut the dendrogram for columns, determining cluster formation; +\item{cutCols}{Height at which to cut the dendrogram for columns, determining cluster formation. Default: same as \code{cutRows}.} \item{obj}{Seurat object containing the data; Default: \code{combined.obj}.} diff --git a/man/plot.UMAP.tSNE.sidebyside.Rd b/man/plot.UMAP.tSNE.sidebyside.Rd index c626492..2bec09f 100644 --- a/man/plot.UMAP.tSNE.sidebyside.Rd +++ b/man/plot.UMAP.tSNE.sidebyside.Rd @@ -27,19 +27,19 @@ \item{grouping}{Variable to group cells by. Default: 'res.0.6'} -\item{no_legend}{Logical, whether to display legend. Default: FALSE} +\item{no_legend}{Logical, whether to display legend. Default: \code{FALSE}.} -\item{do_return}{Logical, whether to return plot object. Default: TRUE} +\item{do_return}{Logical, whether to return plot object. Default: \code{TRUE}.} -\item{do_label}{Logical, whether to display labels. Default: TRUE} +\item{do_label}{Logical, whether to display labels. Default: \code{TRUE}.} \item{label_size}{Size of labels. Default: 10} -\item{vector_friendly}{Logical, whether to optimize for vector outputs. Default: TRUE} +\item{vector_friendly}{Logical, whether to optimize for vector outputs. Default: \code{TRUE}.} \item{cells_use}{A vector of cell names to use for the plot. Default: NULL} -\item{no_axes}{Logical, whether to hide axes. Default: TRUE} +\item{no_axes}{Logical, whether to hide axes. Default: \code{TRUE}.} \item{pt_size}{Size of points. Default: 0.5} diff --git a/man/plot.expression.rank.q90.Rd b/man/plot.expression.rank.q90.Rd index 925a25b..6da3b2f 100644 --- a/man/plot.expression.rank.q90.Rd +++ b/man/plot.expression.rank.q90.Rd @@ -7,11 +7,11 @@ \method{plot}{expression.rank.q90}(obj = combined.obj, gene = "ACTB", filterZero = TRUE) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} \item{gene}{gene of interest, Default: 'ACTB'} -\item{filterZero}{Remove genes whose quantile-90 expression in 0? Default: TRUE} +\item{filterZero}{Remove genes whose quantile-90 expression in 0? Default: \code{TRUE}.} } \description{ Plot gene expression based on the expression at the 90th quantile diff --git a/man/plot3D.umap.Rd b/man/plot3D.umap.Rd index 5fdec41..b290562 100644 --- a/man/plot3D.umap.Rd +++ b/man/plot3D.umap.Rd @@ -6,7 +6,7 @@ \usage{ plot3D.umap( obj = combined.obj, - category = GetNamedClusteringRuns(obj = obj, v = F)[1], + category = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], annotate.by = category, suffix = NULL, dotsize = 1.25, diff --git a/man/plot3D.umap.gene.Rd b/man/plot3D.umap.gene.Rd index 6b90d6a..5373c56 100644 --- a/man/plot3D.umap.gene.Rd +++ b/man/plot3D.umap.gene.Rd @@ -7,7 +7,7 @@ plot3D.umap.gene( gene = "TOP2A", obj = combined.obj, - annotate.by = GetNamedClusteringRuns(obj = obj, v = F)[1], + annotate.by = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], quantileCutoff = 0.99, def.assay = c("integrated", "RNA")[2], suffix = NULL, diff --git a/man/plotClustSizeDistr.Rd b/man/plotClustSizeDistr.Rd index 639ce78..05fcbf6 100644 --- a/man/plotClustSizeDistr.Rd +++ b/man/plotClustSizeDistr.Rd @@ -12,7 +12,7 @@ plotClustSizeDistr(obj = combined.obj, ident, plot = TRUE, thr.hist = 30, ...) \item{ident}{Clustering identity to base the plot on. Default: The second entry from \code{GetClusteringRuns()}.} -\item{plot}{Whether to display the plot (TRUE) or return cluster sizes (FALSE). Default: TRUE.} +\item{plot}{Whether to display the plot (TRUE) or return cluster sizes (FALSE). Default: \code{TRUE}.} \item{thr.hist}{Threshold for switching from a bar plot to a histogram based on the number of clusters. Default: 30.} diff --git a/man/plotGeneExprHistAcrossCells.Rd b/man/plotGeneExprHistAcrossCells.Rd index a45ae12..868d9d5 100644 --- a/man/plotGeneExprHistAcrossCells.Rd +++ b/man/plotGeneExprHistAcrossCells.Rd @@ -31,6 +31,8 @@ plotGeneExprHistAcrossCells( \item{assay}{Assay to use from the Seurat object; Default: "RNA".} +\item{layerX}{Data slot to use ('data' or 'counts'); Default: "data".} + \item{thr_expr}{Expression threshold for highlighting in the plot; Default: 10.} \item{suffix}{Additional text to append to the plot title; Default: NULL.} @@ -39,24 +41,22 @@ plotGeneExprHistAcrossCells( \item{xlab}{Label for the x-axis; Default: "log10(Summed UMI count @data)".} -\item{return_cells_passing}{If TRUE, returns count of cells exceeding the expression threshold; Default: TRUE.} +\item{return_cells_passing}{If TRUE, returns count of cells exceeding the expression threshold; Default: \code{TRUE}.} \item{clip_count_qtl_thr}{Quantile threshold for clipping if using count data; Default: 0.95. Needed for visualization (to avoid x axis compression).} -\item{log10_counts}{If TRUE, log10-transforms the COUNT expression values; Default: TRUE.} +\item{log10_counts}{If TRUE, log10-transforms the COUNT expression values; Default: \code{TRUE}.} -\item{return_quantile}{If TRUE, returns cell count exceeding the quantile threshold; Default: FALSE.} +\item{return_quantile}{If TRUE, returns cell count exceeding the quantile threshold; Default: \code{FALSE}.} \item{w}{Width of the plot in inches; Default: 9.} \item{h}{Height of the plot in inches; Default: 5.} -\item{show_plot}{If TRUE, displays the generated plot; Default: TRUE.} +\item{show_plot}{If TRUE, displays the generated plot; Default: \code{TRUE}.} \item{...}{Additional arguments for customization.} - -\item{slot_}{Data slot to use ('data' or 'counts'); Default: "data".} } \value{ Depending on the parameters, can return a ggplot object, the number of cells passing diff --git a/man/plotMetadataCategPie.Rd b/man/plotMetadataCategPie.Rd index e2a2de1..de35b39 100644 --- a/man/plotMetadataCategPie.Rd +++ b/man/plotMetadataCategPie.Rd @@ -31,6 +31,8 @@ and the count of each category. If \code{FALSE}, only the percentage is shown.} \item{subtitle}{Optional subtitle for the pie chart.} +\item{labels}{Optional labels for the pie chart.} + \item{...}{Additional arguments to pass to the pie chart plotting function.} } \value{ diff --git a/man/plotMetadataCorHeatmap.Rd b/man/plotMetadataCorHeatmap.Rd index 39745eb..110cbec 100644 --- a/man/plotMetadataCorHeatmap.Rd +++ b/man/plotMetadataCorHeatmap.Rd @@ -31,13 +31,13 @@ Default: c("nCount_RNA", "nFeature_RNA", "percent.mito", "percent.ribo").} \item{main}{The main title for the plot. Default: "Metadata correlations" followed by the correlation method.} -\item{show_numbers}{Logical, determines if correlation values should be displayed on the plot. Default: FALSE.} +\item{show_numbers}{Logical, determines if correlation values should be displayed on the plot. Default: \code{FALSE}.} \item{digits}{The number of decimal places for displayed correlation values. Default: 1.} \item{suffix}{A suffix added to the output filename. Default: NULL.} -\item{add_PCA}{Logical, determines if PCA values should be included in the correlation calculation. Default: TRUE.} +\item{add_PCA}{Logical, determines if PCA values should be included in the correlation calculation. Default: \code{TRUE}.} \item{n_PCs}{The number of PCA components to be included if 'add_PCA' is TRUE. Default: 8.} @@ -45,7 +45,7 @@ Default: c("nCount_RNA", "nFeature_RNA", "percent.mito", "percent.ribo").} \item{h}{The height of the plot in inches. Default: the value of w.} -\item{use_ggcorrplot}{Logical, determines if the ggcorrplot package should be used for plotting. Default: FALSE.} +\item{use_ggcorrplot}{Logical, determines if the ggcorrplot package should be used for plotting. Default: \code{FALSE}.} \item{n_cutree}{The number of clusters to be used in hierarchical clustering. Default: the number of PCs.} diff --git a/man/plotMetadataMedianFractionBarplot.Rd b/man/plotMetadataMedianFractionBarplot.Rd index 7a72c99..242c15c 100644 --- a/man/plotMetadataMedianFractionBarplot.Rd +++ b/man/plotMetadataMedianFractionBarplot.Rd @@ -33,13 +33,13 @@ plotMetadataMedianFractionBarplot( \item{min.thr}{Minimum threshold percentage for a cluster. Default: 2.5.} -\item{return.matrix}{Logical; if TRUE, returns a matrix. Default: FALSE.} +\item{return.matrix}{Logical; if TRUE, returns a matrix. Default: \code{FALSE}.} \item{main}{Main title for the plot. Default: "read fractions per transcript class and cluster" followed by the method and suffix.} \item{ylab}{Label for the y-axis. Default: "Fraction of transcriptome (\%)".} -\item{percentify}{Logical. If TRUE, multiplies the fraction by 100. Default: TRUE.} +\item{percentify}{Logical. If TRUE, multiplies the fraction by 100. Default: \code{TRUE}.} \item{subt}{Subtitle for the plot. Default: NULL.} diff --git a/man/plotTheSoup.Rd b/man/plotTheSoup.Rd index 11afadf..ad193e2 100644 --- a/man/plotTheSoup.Rd +++ b/man/plotTheSoup.Rd @@ -8,7 +8,7 @@ plotTheSoup( CellRanger_outs_Dir = "~/Data/114593/114593", library_name = basename(gsub("/outs", "", CellRanger_outs_Dir)), out_dir_prefix = "SoupStatistics", - add_custom_class = F, + add_custom_class = FALSE, pattern_custom = "\\\\.RabV$", ls.Alpha = 1 ) @@ -20,9 +20,11 @@ plotTheSoup( \item{out_dir_prefix}{Prefix for the output directory. Default: 'SoupStatistics'} -\item{add_custom_class}{Add a custom class of genes, matched by apattern in gene symbol. Default: TRUE} +\item{add_custom_class}{Add a custom class of genes, matched by apattern in gene symbol. Default: \code{TRUE}.} -\item{pattern_custom}{The pattern to match in gene symbol. Default: NA} +\item{pattern_custom}{The pattern to match in gene symbol. Default: \code{NA}.} + +\item{ls.Alpha}{The alpha value for the label text. Default: 0.5.} } \description{ Plot stats about the ambient RNA content in a 10X experiment. diff --git a/man/processSeuratObject.Rd b/man/processSeuratObject.Rd index a203d11..b389e33 100644 --- a/man/processSeuratObject.Rd +++ b/man/processSeuratObject.Rd @@ -25,13 +25,13 @@ processSeuratObject( \item{param.list}{A list of parameters used in the processing steps.} -\item{add.meta.fractions}{A boolean indicating whether to add meta data for fractions of cells in each cluster. Default: FALSE.} +\item{add.meta.fractions}{A boolean indicating whether to add meta data for fractions of cells in each cluster. Default: \code{FALSE}.} -\item{compute}{A boolean indicating whether to compute the results. Default: TRUE.} +\item{compute}{A boolean indicating whether to compute the results. Default: \code{TRUE}.} -\item{save}{A boolean indicating whether to save the results. Default: TRUE.} +\item{save}{A boolean indicating whether to save the results. Default: \code{TRUE}.} -\item{plot}{A boolean indicating whether to plot the results. Default: TRUE.} +\item{plot}{A boolean indicating whether to plot the results. Default: \code{TRUE}.} \item{nfeatures}{The number of variable genes to use. Default: 2000.} @@ -44,6 +44,8 @@ processSeuratObject( \item{reduction_input}{The reduction method to use as input for clustering & UMAP. Default: "pca".} \item{WorkingDir}{The working directory to save the results. Default: getwd().} + +\item{...}{Additional parameters to be passed to \code{ScaleData()}.} } \value{ A Seurat object after applying scaling, PCA, UMAP, neighbor finding, and clustering. diff --git a/man/qMarkerCheck.BrainOrg.Rd b/man/qMarkerCheck.BrainOrg.Rd index bfad151..4ce6e99 100644 --- a/man/qMarkerCheck.BrainOrg.Rd +++ b/man/qMarkerCheck.BrainOrg.Rd @@ -10,7 +10,7 @@ qMarkerCheck.BrainOrg(obj = combined.obj, custom.genes = FALSE, suffix = "") \item{obj}{Seurat object for visualization; Default: \code{combined.obj}.} \item{custom.genes}{Logical indicating whether to use a custom set of genes. -If FALSE, a predefined list of key brain organoid markers is used; Default: FALSE.} +If FALSE, a predefined list of key brain organoid markers is used; Default: \code{FALSE}.} \item{suffix}{Suffix for the folder name where the plots are saved; Default: "".} } diff --git a/man/qSeuViolin.Rd b/man/qSeuViolin.Rd index 0500451..ab4bb91 100644 --- a/man/qSeuViolin.Rd +++ b/man/qSeuViolin.Rd @@ -7,7 +7,7 @@ qSeuViolin( obj, feature = "nFeature_RNA", - ident = GetNamedClusteringRuns(obj = obj, v = F)[1], + ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1], split.by = NULL, colors = NULL, clip.outliers = TRUE, @@ -42,11 +42,13 @@ qSeuViolin( \item{colors}{A character vector specifying the colors to use for the plot.} +\item{clip.outliers}{A logical indicating whether to clip outliers.} + \item{replace.na}{A logical indicating whether NA values should be replaced.} \item{pt.size}{The size of the individual datapoints in the plot. Set to 0 to get a clean violin plot.} -\item{sub}{Subtitle of the plot. Default is feature by ident.} +\item{sub}{Subtitle of the plot. Default: feature by ident.} \item{suffix}{An optional string to append to the title of the plot.} @@ -58,7 +60,7 @@ qSeuViolin( \item{hline}{A numeric or logical value; if numeric, the value where a horizontal line should be drawn.} -\item{ylab}{Y-axis label. Default is "Expression".} +\item{ylab}{Y-axis label. Default: "Expression".} \item{ylimit}{A numeric vector specifying the limits of the y-axis.} @@ -70,6 +72,8 @@ qSeuViolin( \item{show_plot}{A logical indicating whether to display the plot.} +\item{grid}{A logical indicating whether to display grid lines.} + \item{w}{Width of the plot.} \item{h}{Height of the plot.} diff --git a/man/qUMAP.Rd b/man/qUMAP.Rd index deafe8a..82ec83b 100644 --- a/man/qUMAP.Rd +++ b/man/qUMAP.Rd @@ -47,9 +47,9 @@ qUMAP( \item{suffix}{Suffix added to the end of the filename; Default: \code{sub}.} -\item{save.plot}{If TRUE, the plot is saved into a file; Default: TRUE.} +\item{save.plot}{If TRUE, the plot is saved into a file; Default: \code{TRUE}.} -\item{PNG}{If TRUE, the file is saved as a .png; Default: TRUE.} +\item{PNG}{If TRUE, the file is saved as a .png; Default: \code{TRUE}.} \item{h}{Height of the plot in inches; Default: 7.} @@ -59,15 +59,15 @@ qUMAP( \item{assay}{Which assay to use ('RNA' or 'integrated'); Default: 'RNA'.} -\item{axes}{If TRUE, axes are shown on the plot; Default: FALSE.} +\item{axes}{If TRUE, axes are shown on the plot; Default: \code{FALSE}.} -\item{aspect.ratio}{Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: FALSE.} +\item{aspect.ratio}{Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: \code{FALSE}.} -\item{HGNC.lookup}{If TRUE, HGNC gene symbol lookup is performed; Default: TRUE.} +\item{HGNC.lookup}{If TRUE, HGNC gene symbol lookup is performed; Default: \code{TRUE}.} -\item{make.uppercase}{If TRUE, feature names are converted to uppercase; Default: FALSE.} +\item{make.uppercase}{If TRUE, feature names are converted to uppercase; Default: \code{FALSE}.} -\item{check_for_2D}{If TRUE, checks if UMAP is 2 dimensional; Default: TRUE.} +\item{check_for_2D}{If TRUE, checks if UMAP is 2 dimensional; Default: \code{TRUE}.} \item{qlow}{Lower quantile for the color scale; Default: 'q10'.} diff --git a/man/qsave.image.Rd b/man/qsave.image.Rd index 5c7e352..2b52171 100644 --- a/man/qsave.image.Rd +++ b/man/qsave.image.Rd @@ -9,7 +9,10 @@ qsave.image(..., showMemObject = TRUE, options = c("--force", NULL)[1]) \arguments{ \item{...}{Pass any other parameter to the internally called functions (most of them should work).} -\item{options}{Options passed on to gzip, via CLI. Default: c("--force", NULL)\link{1}} +\item{showMemObject}{Logical; if TRUE, the function will print out the memory size of the largest +objects in the workspace. Default: \code{TRUE}.} + +\item{options}{Options passed on to gzip, via CLI. Default: \code{c("--force", NULL)[1]}} } \description{ Faster saving of workspace, and compression outside R, when it can run in the background. diff --git a/man/recall.all.genes.Rd b/man/recall.all.genes.Rd index d8ae8f8..e1c2810 100644 --- a/man/recall.all.genes.Rd +++ b/man/recall.all.genes.Rd @@ -7,7 +7,7 @@ recall.all.genes(obj = combined.obj, overwrite = FALSE) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} } \description{ all.genes set by calc.q99.Expression.and.set.all.genes() # diff --git a/man/recall.genes.ls.Rd b/man/recall.genes.ls.Rd index 76820b2..afedfe4 100644 --- a/man/recall.genes.ls.Rd +++ b/man/recall.genes.ls.Rd @@ -7,9 +7,9 @@ recall.genes.ls(obj = combined.obj, overwrite = FALSE) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} -\item{overwrite}{Overwrite already existing in environment? Default: FALSE} +\item{overwrite}{Overwrite already existing in environment? Default: \code{FALSE}.} } \description{ Recall genes.ls from obj@misc to "genes.ls" in the global environment. diff --git a/man/recall.meta.tags.n.datasets.Rd b/man/recall.meta.tags.n.datasets.Rd index 73b7498..cfbc169 100644 --- a/man/recall.meta.tags.n.datasets.Rd +++ b/man/recall.meta.tags.n.datasets.Rd @@ -7,7 +7,7 @@ recall.meta.tags.n.datasets(obj = combined.obj) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} } \description{ Recall meta.tags from obj@misc to "meta.tags" in the global environment. diff --git a/man/recall.parameters.Rd b/man/recall.parameters.Rd index 252c65a..7617015 100644 --- a/man/recall.parameters.Rd +++ b/man/recall.parameters.Rd @@ -7,9 +7,9 @@ recall.parameters(obj = combined.obj, overwrite = FALSE) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} -\item{overwrite}{Overwrite already existing in environment? Default: FALSE} +\item{overwrite}{Overwrite already existing in environment? Default: \code{FALSE}.} } \description{ Recall parameters from obj@misc to "p" in the global environment. diff --git a/man/regress_out_and_recalculate_seurat.Rd b/man/regress_out_and_recalculate_seurat.Rd index 8c346c3..b9b3554 100644 --- a/man/regress_out_and_recalculate_seurat.Rd +++ b/man/regress_out_and_recalculate_seurat.Rd @@ -21,25 +21,25 @@ regress_out_and_recalculate_seurat( \arguments{ \item{obj}{The Seurat object.} -\item{n.var.features}{The number of variable features to use. Default is the 'n.var.genes' element from a list 'p'.} +\item{n.var.features}{The number of variable features to use. Default: the 'n.var.genes' element from a list 'p'.} -\item{features.scale}{A logical value indicating whether to scale the data. Default is TRUE.} +\item{features.scale}{A logical value indicating whether to scale the data. Default: \code{TRUE}.} \item{vars.to.regress}{A vector of variable names to be regressed out.} \item{suffix}{A character string to be used as a suffix when saving the object.} -\item{nPCs}{The number of principal components to use. Default is the 'n.PC' element from a list 'p'.} +\item{nPCs}{The number of principal components to use. Default: the 'n.PC' element from a list 'p'.} -\item{clust_resolutions}{The resolution for clustering. Default is the 'snn_res' element from a list 'p'.} +\item{clust_resolutions}{The resolution for clustering. Default: the 'snn_res' element from a list 'p'.} -\item{calc_tSNE}{Logical, if TRUE, t-SNE will be performed. Default is FALSE.} +\item{calc_tSNE}{Logical, if TRUE, t-SNE will be performed. Default: \code{FALSE}.} -\item{plot_umaps}{Logical, if TRUE, UMAP plots will be generated. Default is TRUE.} +\item{plot_umaps}{Logical, if TRUE, UMAP plots will be generated. Default: \code{TRUE}.} -\item{save_obj}{Logical, if TRUE, the object will be saved. Default is TRUE.} +\item{save_obj}{Logical, if TRUE, the object will be saved. Default: \code{TRUE}.} -\item{assayX}{The assay to be used in scaling data. Default is 'RNA'.} +\item{assayX}{The assay to be used in scaling data. Default: 'RNA'.} } \value{ Seurat object after calculations and manipulations. diff --git a/man/removeCellsByUmap.Rd b/man/removeCellsByUmap.Rd index e01a366..203d584 100644 --- a/man/removeCellsByUmap.Rd +++ b/man/removeCellsByUmap.Rd @@ -16,16 +16,16 @@ removeCellsByUmap( } \arguments{ \item{reduction}{A string specifying the dimension reduction technique to be used -('umap', 'pca', or 'tsne'). Default is 'umap'.} +('umap', 'pca', or 'tsne'). Default: 'umap'.} -\item{umap_dim}{An integer specifying which dimension (axis) to apply the cutoff. Default is 1.} +\item{umap_dim}{An integer specifying which dimension (axis) to apply the cutoff. Default: 1.} -\item{obj}{A Seurat object. Default is 'combined.obj'.} +\item{obj}{A Seurat object. Default: 'combined.obj'.} -\item{cutoff}{A numerical value indicating the cutoff value for the specified dimension. Default is 0.} +\item{cutoff}{A numerical value indicating the cutoff value for the specified dimension. Default: 0.} \item{cut_below}{A logical value indicating whether to remove cells below (TRUE) or -above (FALSE) the cutoff line. Default is TRUE.} +above (FALSE) the cutoff line. Default: \code{TRUE}.} \item{only_plot_cutoff}{Simulate and plot cutoff only.} diff --git a/man/removeClustersAndDropLevels.Rd b/man/removeClustersAndDropLevels.Rd index b81cde4..cbb2c4f 100644 --- a/man/removeClustersAndDropLevels.Rd +++ b/man/removeClustersAndDropLevels.Rd @@ -15,7 +15,7 @@ removeClustersAndDropLevels( \item{ls_obj}{A list of Seurat objects.} \item{object_names}{A character vector containing the names of the Seurat objects to process. -Default is names of all objects in the \code{ls_obj}.} +Default: names of all objects in the \code{ls_obj}.} \item{indices}{A numeric vector indicating which datasets to process by their position in the \code{object_names} vector. By default, it processes the second and third datasets.} diff --git a/man/removeLayersByPattern.Rd b/man/removeLayersByPattern.Rd index 27d6bc0..876065d 100644 --- a/man/removeLayersByPattern.Rd +++ b/man/removeLayersByPattern.Rd @@ -7,9 +7,12 @@ removeLayersByPattern(obj, pattern = "sc[0-9][0-9]_", perl = TRUE) } \arguments{ +\item{obj}{A Seurat object.} + \item{pattern}{A regular expression pattern to match layer names.} -\item{seuratObj}{A Seurat object.} +\item{perl}{A logical value indicating whether to use Perl-compatible regular expressions. +Default: \code{TRUE}.} } \value{ A Seurat object with specified layers removed. diff --git a/man/runDGEA.Rd b/man/runDGEA.Rd index cdf2e8a..9cf3682 100644 --- a/man/runDGEA.Rd +++ b/man/runDGEA.Rd @@ -41,7 +41,7 @@ runDGEA( Default: c(0.1).} \item{reorder.clusters}{Logical indicating whether to reorder clusters based on dimension. -Default: TRUE.} +Default: \code{TRUE}.} \item{reorder.dimension}{Integer specifying the dimension for reordering (1 for x, -1 for y). Default: 1.} @@ -55,37 +55,37 @@ Default: OutDir} the base directory. Default: "DGEA + date".} \item{add.combined.score}{Logical indicating whether to add a combined score to the markers. -Default: TRUE.} +Default: \code{TRUE}.} \item{save.obj}{Logical indicating whether to save the modified Seurat object. -Default: TRUE.} +Default: \code{TRUE}.} \item{calculate.DGEA}{Logical determining if the DE analysis should be calculated. -Default: TRUE.} +Default: \code{TRUE}.} \item{plot.DGEA}{Logical determining if results should be plotted. -Default: TRUE.} +Default: \code{TRUE}.} \item{umap_caption}{Character string specifying the caption for the UMAP plot. Default: "".} \item{plot.av.enrichment.hist}{Logical indicating whether to plot the average enrichment histogram. -Default: TRUE.} +Default: \code{TRUE}.} \item{plot.log.top.gene.stats}{Logical indicating whether to plot the log top gene statistics.} \item{auto.cluster.naming}{Logical indicating automatic labeling of clusters. -Default: TRUE.} +Default: \code{TRUE}.} \item{clean.misc.slot}{Logical indicating whether to clean the misc slots of previous -clustering results. Default: TRUE.} +clustering results. Default: \code{TRUE}.} \item{clean.meta.data}{Logical indicating whether to clean the metadata slots of -previous clustering results. Default: TRUE.} +previous clustering results. Default: \code{TRUE}.} \item{n.cores}{Integer specifying the number of cores to use for parallel processing (multisession). Default: 1.} -\item{presto}{Logical indicating whether to use presto for DE analysis. Default: TRUE.} +\item{presto}{Logical indicating whether to use presto for DE analysis. Default: \code{TRUE}.} \item{WorkingDir}{Character string specifying the working directory. Default: getwd().} } diff --git a/man/save.parameters.Rd b/man/save.parameters.Rd index 2b41296..f5443ed 100644 --- a/man/save.parameters.Rd +++ b/man/save.parameters.Rd @@ -11,7 +11,7 @@ save.parameters(obj = combined.obj, params = p, overwrite = TRUE) \item{params}{List of parameters to save; Default: \code{p}.} -\item{overwrite}{Logical indicating if existing parameters should be overwritten; Default: TRUE.} +\item{overwrite}{Logical indicating if existing parameters should be overwritten; Default: \code{TRUE}.} } \description{ Stores a list of parameters within the \verb{@misc$p} slot of a Seurat object, diff --git a/man/save2plots.A4.Rd b/man/save2plots.A4.Rd index 9e21642..bc06a6f 100644 --- a/man/save2plots.A4.Rd +++ b/man/save2plots.A4.Rd @@ -20,7 +20,7 @@ save2plots.A4( \item{plot_list}{A list containing ggplot objects to be arranged and saved.} \item{pname}{Boolean indicating if the plot name should be automatically generated; -if FALSE, the name is based on \code{plot_list} and \code{suffix}; Default: FALSE.} +if FALSE, the name is based on \code{plot_list} and \code{suffix}; Default: \code{FALSE}.} \item{suffix}{Suffix to be added to the generated filename if \code{pname} is FALSE; Default: NULL.} diff --git a/man/save4plots.A4.Rd b/man/save4plots.A4.Rd index f5226d3..9e64712 100644 --- a/man/save4plots.A4.Rd +++ b/man/save4plots.A4.Rd @@ -19,7 +19,7 @@ save4plots.A4( \arguments{ \item{plot_list}{A list containing ggplot objects to be arranged and saved; each object represents one panel.} -\item{pname}{Plot name; if FALSE, a name is generated automatically based on \code{plot_list} and \code{suffix}; Default: FALSE.} +\item{pname}{Plot name; if FALSE, a name is generated automatically based on \code{plot_list} and \code{suffix}; Default: \code{FALSE}.} \item{suffix}{Suffix to be added to the filename; Default: NULL.} diff --git a/man/scBarplot.CellFractions.Rd b/man/scBarplot.CellFractions.Rd index 422fa0c..3311326 100644 --- a/man/scBarplot.CellFractions.Rd +++ b/man/scBarplot.CellFractions.Rd @@ -20,8 +20,6 @@ scBarplot.CellFractions( save_plot = TRUE, also.pdf = FALSE, seedNr = 1989, - w = NULL, - h = 6, draw_plot = TRUE, show_numbers = FALSE, min.pct = 0.05, @@ -35,6 +33,8 @@ scBarplot.CellFractions( cex.total = 2, xlab.angle = 45, show_plot = TRUE, + w = NULL, + h = 6, ... ) } @@ -43,6 +43,8 @@ scBarplot.CellFractions( \item{group.by}{The variable to group by for the bar plot.} +\item{obj}{A Seurat object.} + \item{downsample}{Logical indicating whether to downsample data to equalize group sizes.} \item{min.nr.sampled.cells}{The minimal number of cells to sample from each identity class. Defaults to 200 cells.} @@ -51,22 +53,22 @@ scBarplot.CellFractions( \item{suffix}{Optional suffix for the plot title.} +\item{prefix}{Optional prefix for the plot title.} + \item{sub_title}{Optional subtitle for the plot.} \item{hlines}{Numeric vector specifying y-intercepts of horizontal lines to add to the plot.} \item{return_table}{Logical; if TRUE, returns a contingency table instead of plotting.} +\item{save_table}{Logical; if TRUE, saves the table behind the plot.} + \item{save_plot}{Logical; if TRUE, saves the generated plot.} \item{also.pdf}{Save plot in both png and pdf formats.} \item{seedNr}{Seed for random number generation to ensure reproducibility.} -\item{w}{Width of the plot in inches.} - -\item{h}{Height of the plot in inches.} - \item{draw_plot}{Logical; if FALSE, suppresses plotting (useful if only the table is desired).} \item{show_numbers}{Logical; if TRUE, adds count numbers on top of each bar in the plot.} @@ -86,9 +88,15 @@ are aggregated into an "Other" category.} \item{cex.total}{Label size for total cells} -\item{...}{Additional parameters passed to internally called functions.} +\item{xlab.angle}{Angle of x-axis labels.} + +\item{show_plot}{Logical; if TRUE, shows the plot.} -\item{dsample.to.repl.thr}{Logical indicating if sampling should be done with replacement. Defaults to FALSE.} +\item{w}{Width of the plot in inches. Default: \code{NULL}} + +\item{h}{Height of the plot in inches. Default: \code{6}} + +\item{...}{Additional parameters passed to internally called functions.} } \value{ Depending on the value of \code{return_table}, either returns a ggplot object or a list diff --git a/man/scBarplot.CellsPerCluster.Rd b/man/scBarplot.CellsPerCluster.Rd index 9bc1e91..6942b66 100644 --- a/man/scBarplot.CellsPerCluster.Rd +++ b/man/scBarplot.CellsPerCluster.Rd @@ -25,17 +25,17 @@ scBarplot.CellsPerCluster( \item{ident}{Cluster identity. Used to specify which clustering results to visualize. Default: First entry from ordered clustering runs.} -\item{sort}{If TRUE, sorts clusters by size. Default: FALSE.} +\item{sort}{If TRUE, sorts clusters by size. Default: \code{FALSE}.} \item{sub}{Subtitle for the plot. Default: "identity".} -\item{label}{If TRUE, shows cell count or percentage based on the label vector. Default: TRUE.} +\item{label}{If TRUE, shows cell count or percentage based on the label vector. Default: \code{TRUE}.} \item{suffix}{Optional suffix for file naming. Used in conjunction with \code{kpp}.} \item{palette}{Color palette for the barplot. Default: 'glasbey'.} -\item{return_table}{If TRUE, returns the data used for plotting instead of the plot itself. Default: FALSE.} +\item{return_table}{If TRUE, returns the data used for plotting instead of the plot itself. Default: \code{FALSE}.} \item{ylab_adj}{Adjustment factor for y-axis label positioning. Default: 1.1.} diff --git a/man/scBarplot.CellsPerObject.Rd b/man/scBarplot.CellsPerObject.Rd index 8b64d8e..0ce0d2b 100644 --- a/man/scBarplot.CellsPerObject.Rd +++ b/man/scBarplot.CellsPerObject.Rd @@ -20,7 +20,7 @@ scBarplot.CellsPerObject( \item{xlab.angle}{Angle for x-axis labels, enhancing readability. Default: 45.} \item{names}{Optionally provide custom names for x-axis labels. If FALSE, uses object names -from \code{ls.obj}. Default: FALSE.} +from \code{ls.obj}. Default: \code{FALSE}.} \item{...}{Extra parameters passed to \code{qbarplot}.} } diff --git a/man/scBarplot.FractionAboveThr.Rd b/man/scBarplot.FractionAboveThr.Rd index 5d878fe..28ff9e4 100644 --- a/man/scBarplot.FractionAboveThr.Rd +++ b/man/scBarplot.FractionAboveThr.Rd @@ -29,13 +29,13 @@ scBarplot.FractionAboveThr( \item{subtitle}{Optional subtitle for the plot.} -\item{return.df}{Whether to return the underlying data frame instead of the plot. Default: FALSE.} +\item{return.df}{Whether to return the underlying data frame instead of the plot. Default: \code{FALSE}.} \item{label}{Whether to add labels to the bar plot. Default: NULL.} \item{suffix}{Suffix for the output file name.} -\item{above}{Whether to calculate the fraction of cells above or below the threshold. Default: TRUE.} +\item{above}{Whether to calculate the fraction of cells above or below the threshold. Default: \code{TRUE}.} \item{...}{Additional parameters for plotting functions.} } diff --git a/man/scBarplot.FractionBelowThr.Rd b/man/scBarplot.FractionBelowThr.Rd index b7822ed..abfe0a7 100644 --- a/man/scBarplot.FractionBelowThr.Rd +++ b/man/scBarplot.FractionBelowThr.Rd @@ -24,7 +24,7 @@ scBarplot.FractionBelowThr( \item{obj}{Seurat object with cell data. Default: \code{combined.obj}.} -\item{return.df}{If TRUE, returns the data frame instead of the plot. Default: FALSE.} +\item{return.df}{If TRUE, returns the data frame instead of the plot. Default: \code{FALSE}.} } \description{ Generates a bar plot to visualize the percentage of cells within each cluster that diff --git a/man/scBarplotEnrichr.Rd b/man/scBarplotEnrichr.Rd index 6385113..524f42f 100644 --- a/man/scBarplotEnrichr.Rd +++ b/man/scBarplotEnrichr.Rd @@ -15,7 +15,7 @@ scBarplotEnrichr( save = TRUE, w = 10, h = 10, - also.pdf = F, + also.pdf = FALSE, ... ) } @@ -32,7 +32,13 @@ scBarplotEnrichr( \item{caption}{Character. Caption of the plot. Default: constructed from input parameters.} -\item{save}{Logical. Whether to save the plot to a file. Default: TRUE.} +\item{save}{Logical. Whether to save the plot to a file. Default: \code{TRUE}.} + +\item{w}{Width of the plot canvas, calculated as the width of an A4 page times \code{scale}; Default: \code{11.69 * scale}.} + +\item{h}{Height of the plot canvas, calculated as the height of an A4 page times \code{scale}; Default: \code{8.27 * scale}.} + +\item{also.pdf}{Save plot in both png and pdf formats.} \item{...}{Additional arguments passed to \code{enrichplot::barplot.enrichResult}.} } diff --git a/man/scBarplotStackedMetaCateg_List.Rd b/man/scBarplotStackedMetaCateg_List.Rd index 5b0330b..427eb01 100644 --- a/man/scBarplotStackedMetaCateg_List.Rd +++ b/man/scBarplotStackedMetaCateg_List.Rd @@ -10,6 +10,8 @@ scBarplotStackedMetaCateg_List(ls.obj, meta.col, ...) \item{ls.obj}{List of Seurat objects.} \item{meta.col}{The metadata column name to be used for the barplot.} + +\item{...}{Additional arguments passed to \code{ggExpress::qbarplot.df}.} } \value{ A ggplot object representing the stacked barplot. diff --git a/man/scEnhancedVolcano.Rd b/man/scEnhancedVolcano.Rd index 3a11892..d0736cc 100644 --- a/man/scEnhancedVolcano.Rd +++ b/man/scEnhancedVolcano.Rd @@ -9,10 +9,7 @@ scEnhancedVolcano( x = "avg_log2FC", y = "p_val_adj", lab = rownames(toptable), - suffix = NULL, title = paste("DGEA"), - caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)), - caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"), selectLab = trail(lab, 10), min.p = 1e-50, max.l2fc = Inf, @@ -20,8 +17,11 @@ scEnhancedVolcano( pCutoffCol = "p_val_adj", pCutoff = 0.001, FCcutoff = 1, + suffix = NULL, + caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)), + caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"), count_stats = TRUE, - drawConnectors = T, + drawConnectors = TRUE, max.overlaps = Inf, h = 9, w = h, @@ -37,14 +37,14 @@ scEnhancedVolcano( \item{lab}{A vector of gene symbols to label on the plot.} -\item{suffix}{A string to append to the filename/title of the plot.} - \item{title}{The title of the plot.} \item{selectLab}{A vector of gene symbols to select for labeling.} \item{min.p}{The minimum p-value, to trim high values on the Y-axis.} +\item{max.l2fc}{The maximum log2 fold change, to trim high values on the X-axis.} + \item{min.pct.cells}{The minimum percentage of cells in which a gene must be expressed to be included in the plot.} \item{pCutoffCol}{The column in the toptable that contains the p-value cutoff.} @@ -53,6 +53,12 @@ scEnhancedVolcano( \item{FCcutoff}{The fold change cutoff.} +\item{suffix}{A string to append to the filename/title of the plot.} + +\item{caption}{The first line of caption of the plot.} + +\item{caption2}{The second line of caption of the plot.} + \item{count_stats}{Logical. Calculates a data frame with the count statistics.} \item{drawConnectors}{Whether to draw connectors between the labels and the points.} @@ -63,9 +69,7 @@ scEnhancedVolcano( \item{w}{The width of the plot.} -\item{...}{Pass any other parameter to the internally called functions (most of them should work).} - -\item{subtitle}{The subtitle of the plot.} +\item{...}{Pass any other parameter to \code{EnhancedVolcano::EnhancedVolcano()}.} } \value{ A ggplot object. diff --git a/man/scGOEnrichment.Rd b/man/scGOEnrichment.Rd index db83102..7cfe383 100644 --- a/man/scGOEnrichment.Rd +++ b/man/scGOEnrichment.Rd @@ -36,11 +36,13 @@ scGOEnrichment( \item{qvalueCutoff}{Numeric. Q-value cutoff for significance. Default: 0.2.} -\item{save}{Logical. Save the results as a data frame. Default: TRUE.} +\item{save}{Logical. Save the results as a data frame. Default: \code{TRUE}.} \item{suffix}{Character. Suffix to append to the output file name. Default: 'GO.Enrichments'.} -\item{check.gene.symbols}{Logical. Check gene symbols for validity. Default: TRUE.} +\item{check.gene.symbols}{Logical. Check gene symbols for validity. Default: \code{TRUE}.} + +\item{...}{Additional arguments to pass to \code{clusterProfiler::enrichGO}.} } \value{ A data frame with GO enrichment results. diff --git a/man/scPieClusterDistribution.Rd b/man/scPieClusterDistribution.Rd index aec11e3..314e5ad 100644 --- a/man/scPieClusterDistribution.Rd +++ b/man/scPieClusterDistribution.Rd @@ -27,6 +27,6 @@ identity in a single-cell RNA-seq object. } \examples{ \dontrun{ -scPieClusterDistribution(obj = combined.obj, ident = 'cluster_identity') +scPieClusterDistribution(obj = combined.obj, ident = "cluster_identity") } } diff --git a/man/scPlotPCAvarExplained.Rd b/man/scPlotPCAvarExplained.Rd index 097d824..b2ec107 100644 --- a/man/scPlotPCAvarExplained.Rd +++ b/man/scPlotPCAvarExplained.Rd @@ -8,8 +8,8 @@ scPlotPCAvarExplained( obj = combined.obj, plotname = "Variance Explained by Principal Components", sub = paste(ncol(obj), "cells, ", nrow(obj), "features."), - use.MarkdownReports = FALSE, caption = "hline at 1\%", + use.MarkdownReports = FALSE, ... ) } @@ -24,8 +24,12 @@ Components".} features analyzed. Default: A string generated from \code{obj} stating the number of cells and features.} +\item{caption}{A caption for the plot. Default: "hline at 1\%".} + \item{use.MarkdownReports}{Boolean indicating whether to use \code{MarkdownReports} for plotting. If \code{FALSE}, \code{ggExpress} is used. Default: \code{FALSE}.} + +\item{...}{Additional arguments to be passed to \code{ggExpress::qbarplot} or \code{MarkdownReports::wbarplot}.} } \value{ Generates a plot showing the percent of variation each PC accounts for. This function diff --git a/man/set.mm.Rd b/man/set.mm.Rd index 29f9f2d..ed336a1 100644 --- a/man/set.mm.Rd +++ b/man/set.mm.Rd @@ -7,7 +7,7 @@ set.mm(obj = combined.obj) } \arguments{ -\item{obj}{Seurat object, Default: combined.obj} +\item{obj}{Seurat object, Default: \code{combined.obj}} } \description{ Helps to find metadata columns. It creates a list with the names of of 'obj@meta.data'. diff --git a/man/showMiscSlots.Rd b/man/showMiscSlots.Rd index 12b7c13..cffb567 100644 --- a/man/showMiscSlots.Rd +++ b/man/showMiscSlots.Rd @@ -7,7 +7,7 @@ showMiscSlots(obj = combined.obj, max.level = 1, subslot = NULL, ...) } \arguments{ -\item{obj}{An object whose \verb{@misc} slot needs to be examined. Default: combined.obj} +\item{obj}{An object whose \verb{@misc} slot needs to be examined. Default: \code{combined.obj}} \item{max.level}{Max depth to dive into sub-elements.} diff --git a/man/showToolsSlots.Rd b/man/showToolsSlots.Rd index 341a9ad..84a8011 100644 --- a/man/showToolsSlots.Rd +++ b/man/showToolsSlots.Rd @@ -8,6 +8,12 @@ showToolsSlots(obj, max.level = 1, subslot = NULL, ...) } \arguments{ \item{obj}{An object whose \verb{@tools} slot needs to be examined.} + +\item{max.level}{The maximum level of nesting to print.} + +\item{subslot}{The name of a sub-slot within the \verb{@tools} slot to examine.} + +\item{...}{Additional arguments to be passed to \code{str}.} } \description{ \code{showToolsSlots} prints the names of slots in the \verb{@tools} of a given object. diff --git a/man/suPlotVariableFeatures.Rd b/man/suPlotVariableFeatures.Rd index 63edf72..a4341fa 100644 --- a/man/suPlotVariableFeatures.Rd +++ b/man/suPlotVariableFeatures.Rd @@ -21,17 +21,19 @@ suPlotVariableFeatures( \item{NrVarGenes}{A vector containing the top 20 variable genes for the Seurat object.} -\item{repel}{A logical value indicating whether to repel the labels to avoid overlap. Default: TRUE.} +\item{repel}{A logical value indicating whether to repel the labels to avoid overlap. Default: \code{TRUE}.} \item{plotWidth}{Numeric value specifying the width of the plot when saved. Default: 7.} \item{plotHeight}{Numeric value specifying the height of the plot when saved. Default: 5.} -\item{sampleName}{A string specifying the sample name, used to generate the filename for saving -the plot.} +\item{save}{A logical value indicating whether to save the plot to a PDF file. Default: \code{TRUE}.} -\item{ppp}{A function for constructing the path and filename for saving the plot. It takes three -arguments: a prefix for the filename, a sample name, and the file extension ('pdf').} +\item{assay}{The assay to use for the plot. Default: DefaultAssay(obj).} + +\item{suffix}{A string suffix to append to the plot filename. Default: NULL.} + +\item{...}{Additional arguments to pass to the Seurat::VariableFeaturePlot function.} } \description{ Generates a Variable Feature Plot for a specified Seurat object, labels points with diff --git a/man/transferLabelsSeurat.Rd b/man/transferLabelsSeurat.Rd index 07300e7..381c7f6 100644 --- a/man/transferLabelsSeurat.Rd +++ b/man/transferLabelsSeurat.Rd @@ -25,27 +25,41 @@ transferLabelsSeurat( \arguments{ \item{query_obj}{A Seurat object for which the labels are to be transferred.} -\item{reference_obj}{Alternative to \code{reference_path}. If provided, the path is not used to load the reference data.} +\item{reference_obj}{Alternative to \code{reference_path}. If provided, the path is not used to load +the reference data.} -\item{reference_path}{A character string indicating the file path to the reference Seurat object. The path must exist.} +\item{reference_path}{A character string indicating the file path to the reference Seurat object. +The path must exist.} -\item{new_ident}{A character string specifying the name of the new identity class to be created in the query Seurat object. Default is obtained by replacing 'ordered' with 'transferred' in named_ident.} +\item{reference_ident}{A character string specifying the name of the identity class to be used +from the reference Seurat object. Default is 'RNA_snn_res.0.3.ordered.ManualNames'.} -\item{predictions_col}{A character string specifying the column in the metadata of the transferred Seurat object containing the transferred labels. Default is 'predicted.id'.} +\item{anchors}{A list of anchors obtained from the FindTransferAnchors function. If NULL, the} -\item{save_anchors}{save anchors as RDS file.} +\item{new_ident}{A character string specifying the name of the new identity class to be +created in the query Seurat object. Default is obtained by replacing 'ordered' with +'transferred' in reference_ident.} -\item{plot_suffix}{A string to added to the UMAP with the new identity.} +\item{predictions_col}{A character string specifying the column in the metadata of the transferred +Seurat object containing the transferred labels. Default is 'predicted.id'.} -\item{w}{Width for the saved image. Default: 9} +\item{predictions_score}{A character string specifying the column in the metadata of the transferred +Seurat object containing the scores of the transferred labels. Default is 'transferred.score'.} -\item{h}{Height for the saved image. Default: 12} +\item{save_anchors}{A logical indicating whether to save the anchors as an RDS file. Default is TRUE.} -\item{...}{Additional arguments passed to the Seurat.utils::clUMAP function.} +\item{reference_suffix}{A character string to be used as in the subtitle of the reference UMAP plot. +Default is 'REFERENCE.obj'.} -\item{named_ident}{A character string specifying the name of the identity class to be used from the reference Seurat object. Default is 'RNA_snn_res.0.3.ordered.ManualNames'.} +\item{plot_suffix}{A character string to be added to the UMAP with the new identity. Default is NULL.} -\item{suffix}{A character string to be used as a suffix in the visualization. Default is 'NEW'.} +\item{plot_reference}{A logical indicating whether to plot the reference UMAP. Default is TRUE.} + +\item{w}{Width for the saved image. Default: \code{9}} + +\item{h}{Height for the saved image. Default: \code{12}} + +\item{...}{Additional arguments passed to the \code{Seurat.utils::clUMAP} function.} } \value{ The modified query Seurat object with the transferred labels as a new identity class. @@ -56,7 +70,7 @@ using anchoring and transfer data methods from the Seurat package. It then visua reference and the combined objects using Uniform Manifold Approximation and Projection (UMAP). } \examples{ -# combined.objX <- transferLabelsSeurat(named_ident = 'RNA_snn_res.0.3.ordered.ManualNames', +# combined.objX <- transferLabelsSeurat(reference_ident = 'RNA_snn_res.0.3.ordered.ManualNames', # reference_obj = reference_obj, # query_obj = combined.obj) diff --git a/man/umapHiLightSel.Rd b/man/umapHiLightSel.Rd index 4f7d518..c252b86 100644 --- a/man/umapHiLightSel.Rd +++ b/man/umapHiLightSel.Rd @@ -10,7 +10,7 @@ umapHiLightSel( ident = GetClusteringRuns()[1], h = 7, w = 5, - show_plot = T, + show_plot = TRUE, ... ) } @@ -21,7 +21,16 @@ umapHiLightSel( Default: \code{c("0", "2", "4")}.} \item{ident}{Name of the metadata column containing cluster IDs; -Default: 'integrated_snn_res.0.3'.} +Default: \code{GetClusteringRuns()[1]}.} + +\item{h}{Height of the plot; Default: \code{7}.} + +\item{w}{Width of the plot; Default: \code{5}.} + +\item{show_plot}{Logical; if \code{TRUE}, the plot will be displayed in the RStudio viewer; +Default: \code{TRUE}.} + +\item{...}{Additional arguments to be passed to the \code{DimPlot} function.#'} } \value{ Saves a UMAP plot highlighting specified clusters to the current working directory. diff --git a/man/writeCombinedMetadataToTsvFromLsObj.Rd b/man/writeCombinedMetadataToTsvFromLsObj.Rd index 36b5d50..1b7c8ba 100644 --- a/man/writeCombinedMetadataToTsvFromLsObj.Rd +++ b/man/writeCombinedMetadataToTsvFromLsObj.Rd @@ -17,6 +17,12 @@ writeCombinedMetadataToTsvFromLsObj( \item{cols.remove}{A character vector of column names to be removed from each metadata data frame. Default is an empty character vector, meaning no columns will be removed.} + +\item{save_as_qs}{A logical indicating whether to save the merged metadata as a .qs object.} + +\item{save_as_tsv}{A logical indicating whether to save the merged metadata as a .tsv file.} + +\item{...}{Additional arguments to be passed to \code{write.table}.} } \value{ A large data frame that is the row-wise merge of all \verb{@meta.data} data frames.