Skip to content

Commit

Permalink
DGEA naming
Browse files Browse the repository at this point in the history
  • Loading branch information
vertesy committed Sep 25, 2024
1 parent af711d1 commit a78a650
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 52 deletions.
85 changes: 50 additions & 35 deletions R/Seurat.Utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ processSeuratObject <- function(obj, param.list = p, add.meta.fractions = FALSE,
#' clustering results. Default: TRUE.
#' @param clean.meta.data Logical indicating whether to clean the metadata slots of
#' previous clustering results. Default: TRUE.
#' @param n.cores Integer specifying the number of cores to use for parallel processing (multisession).
#' Default: 1.
#' @param presto Logical indicating whether to use presto for DE analysis. Default: TRUE.
#'
#' @return Modified Seurat object and markers list.
#' @examples
Expand All @@ -219,9 +222,9 @@ runDGEA <- function(obj,
reorder.dimension = 1,
# ordering = if(any(!testNumericCompatible(res.analyzed.DE))) "no" else "ordered", # param.list$"cl.annotation"
# ordering = "ordered", # param.list$"cl.annotation"
directory = OutDir,
directory,
dir_suffix,
subdirectory = ppp("DGEA_res", idate("%Y.%m.%d_%H.%M")),
subdirectory = ppp("DGEA_res", idate()),
add.combined.score = TRUE,
save.obj = TRUE,
calculate.DGEA = TRUE,
Expand All @@ -231,8 +234,11 @@ runDGEA <- function(obj,
auto.cluster.naming = TRUE,
clean.misc.slot = TRUE,
clean.meta.data = TRUE,
...) {
n.cores = 1,
presto = TRUE
) {

if(presto) require(presto)
# Assertions for input parameters
stopifnot(
is(obj, "Seurat"),
Expand All @@ -243,6 +249,7 @@ runDGEA <- function(obj,
)

create_set_OutDir(directory, subdirectory, newName = "dir_DGEA")
dir_DGEA <- OutDir

# Log utilized parameters from param.list
{
Expand Down Expand Up @@ -310,15 +317,19 @@ runDGEA <- function(obj,
ident
} else {
if (reorder.clusters) {
GetOrderedClusteringRuns(res = res, obj = obj)[1]
GetOrderedClusteringRuns(res = res.analyzed.DE[i], obj = obj)[1]
} else {
GetClusteringRuns(res = res, obj = obj)[1]
GetClusteringRuns(res = res.analyzed.DE[i], obj = obj)[1]
}
} # end if is.null(ident)
stopifnot(Idents.for.DEG[[i]] %in% names(obj@meta.data))
} # end for loop



# Loop through each resolution setting to find markers ________________________________________
if (n.cores>1) plan("multisession", workers = n.cores)

if (calculate.DGEA) {
message("Calclulating ----------------------------------------")
for (i in 1:length(res.analyzed.DE)) {
Expand All @@ -336,7 +347,6 @@ runDGEA <- function(obj,
tic(); df.markers <- Seurat::FindAllMarkers(obj,
verbose = TRUE,
test.use = param.list$"test",

logfc.threshold = param.list$"logfc.threshold",
return.thresh = param.list$"return.thresh",
min.pct = param.list$"min.pct",
Expand All @@ -349,6 +359,7 @@ runDGEA <- function(obj,


Stringendo::stopif(is.null(df.markers))

# order df.markers by logFC
df.markers <- df.markers[order(df.markers$"avg_log2FC", decreasing = TRUE), ]

Expand Down Expand Up @@ -405,9 +416,9 @@ runDGEA <- function(obj,
message('Automatic cluster labeling by top gene.')

obj <- StoreAllMarkers(df_markers = df.markers, res = res, obj = obj)
obj <- AutoLabelTop.logFC(group.by = Idents.for.DEG[[i]], res = res, obj = obj)
obj <- AutoLabelTop.logFC(group.by = Idents.for.DEG[[i]], obj = obj, plot.top.genes = FALSE) # already plotted above

clUMAP(ident = ppp("cl.names.top.gene.res", res), obj = obj)
clUMAP(ident = ppp("cl.names.top.gene", Idents.for.DEG[[i]]), obj = obj, caption = "")
} # end if auto.cluster.naming

# Plot per-cluster gene enrichment histogram ________________________________________
Expand Down Expand Up @@ -869,15 +880,15 @@ showToolsSlots <- function(obj, max.level = 1, subslot = NULL, ...) {
#' @description See `showToolsSlots` for details. Prints the names of slots in the `@misc` of a given object.
#' It specifically targets list elements, skipping over data frames and other non-list objects.
#'
#' @param obj An object whose `@misc` slot needs to be examined.
#' @param obj An object whose `@misc` slot needs to be examined. Default: combined.obj
#' @param max.level Max depth to dive into sub-elements.
#' @param subslot A subslot within `@misc`.
#' @param ... ...
#'
#' @examples showToolsSlots(obj)
#'
#' @export
showMiscSlots <- function(obj, max.level = 1, subslot = NULL,
showMiscSlots <- function(obj = combined.obj, max.level = 1, subslot = NULL,
...) {
slotX <- if (is.null(subslot)) obj@misc else obj@misc[[subslot]]
str(slotX, max.level = max.level, ...)
Expand Down Expand Up @@ -999,6 +1010,9 @@ calc.q99.Expression.and.set.all.genes <- function(
#' @param genes A character vector of gene symbols.
#' @param pattern_NC A character vector of patterns to filter out non-coding gene symbols.
#' Default: c("^AC.", "^AL.", "^c[1-9]orf", "\\.AS[1-9]$").
#' @param v "verbose" Whether to print the number of genes before and after filtering.
#' @param unique Whether to return unique gene symbols. Default: TRUE.
#' @param ... Additional arguments to pass to \code{\link[stringr]{str_detect}}.
#'
#' @return A character vector of filtered gene symbols.
#'
Expand All @@ -1014,7 +1028,7 @@ filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}"
"^LINC0[0-9]{4}", "^C[1-9]+orf[1-9]+",
"[-|\\.]AS[1-9]*$", "[-|\\.]DT[1-9]*$",
"^MIR[1-9]", "^SNHG[1-9]"),
unique = TRUE, ...) {
v = TRUE, unique = TRUE, ...) {

# Input assertions
stopifnot(is.character(genes), length(genes) > 0,
Expand All @@ -1029,13 +1043,15 @@ filterNcGenes <- function(genes, pattern_NC = c("^A[CFLP][0-9]{6}", "^Z[0-9]{5}"
genes_kept <- genes[stringr::str_detect(genes, combined_pattern, negate = TRUE)]

# Report original and final list sizes and percentage remaining
original_length <- length(genes)
filtered_length <- length(genes_kept)
percentage_remaining <- (filtered_length / original_length) * 100
if(v) {
original_length <- length(genes)
filtered_length <- length(genes_kept)
percentage_remaining <- (filtered_length / original_length) * 100

message("Original number of gene symbols: ", original_length)
message("Filtered number of gene symbols: ", filtered_length)
message("Percentage remaining: ", round(percentage_remaining, 2), "%")
message("Original number of gene symbols: ", original_length)
message("Filtered number of gene symbols: ", filtered_length)
message("Percentage remaining: ", round(percentage_remaining, 2), "%")
}

# Output assertions
stopifnot(is.character(genes_kept), length(genes_kept) <= original_length)
Expand Down Expand Up @@ -2111,7 +2127,7 @@ downsampleSeuObjByIdentAndMaxcells <- function(obj,


# _________________________________________________________________________________________________
#' @title Remove Residual Small Clusters from Seurat Object
#' @title Remove Residual Small Clusters from a Seurat Object
#'
#' @description Removes clusters containing fewer cells than specified by `max.cells`
#' from a Seurat object. This function is particularly useful after subsetting a dataset,
Expand All @@ -2137,6 +2153,7 @@ removeResidualSmallClusters <- function(
identitites = GetClusteringRuns(obj, pat = "*snn_res.[0-9].[0-9]$")[1:5],
max.cells = max(round((ncol(obj)) / 2000), 5),
plot.removed = TRUE) {
#
META <- obj@meta.data
all.cells <- rownames(META)

Expand Down Expand Up @@ -2227,7 +2244,7 @@ dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character


# ____________________________________________________________________
#' @title Remove Clusters and Drop Levels
#' @title Remove Clusters and Drop Levels from a List of Seurat Objects
#'
#' @description This function removes residual small clusters from specified Seurat objects and
#' drops levels in factor-like metadata.
Expand All @@ -2250,9 +2267,10 @@ dropLevelsSeurat <- function(obj = combined.obj, verbose = TRUE, also.character
#' }
#'
#' @export
removeClustersAndDropLevels <- function(
ls_obj, object_names = names(ls_obj),
indices = 2:3, ...) {
removeClustersAndDropLevels <- function(ls_obj,
object_names = names(ls_obj),
indices = 2:3, ...) {
#
for (index in indices) {
dataset_name <- object_names[index]
obj <- ls_obj[[dataset_name]]
Expand Down Expand Up @@ -2612,9 +2630,10 @@ GetTopMarkersDF <- function(
"^MIR[1-9]", "^SNHG[1-9]")
) {
"Works on active Idents() -> thus we call cluster"
combined_pattern <- paste(exclude, collapse = "|")

TopMarkers <- dfDE |>
dplyr::filter(!grepl(exclude, gene, perl = TRUE)) |>
dplyr::filter(!grepl(combined_pattern, gene, perl = TRUE)) |>
arrange(desc(!!as.name(order.by))) |>
dplyr::group_by(cluster) |>
dplyr::slice(1:n) |>
Expand Down Expand Up @@ -2652,18 +2671,11 @@ GetTopMarkers <- function(dfDE = df.markers,
order.by = c("combined.score", "avg_log2FC", "p_val_adj")[2]) {
message("Works on active Idents()") # thus we call cluster
TopMarkers <- dfDE |>
arrange(desc(!!as.name(order.by))) |>
group_by(cluster) |>
dplyr::arrange(desc(!!as.name(order.by))) |>
dplyr::group_by(cluster) |>
dplyr::slice(1:n) |>
dplyr::select(gene) |>
col2named.vec.tbl()

# TopMarkers <- dfDE |>
# arrange(desc(!!as.name(order.by))) |>
# group_by(cluster) |>
# dplyr::slice(1:n) |>
# dplyr::select(gene) |>
# col2named.vec.tbl()
CodeAndRoll2::col2named.vec.tbl()

return(TopMarkers)
}
Expand Down Expand Up @@ -2760,7 +2772,7 @@ AutoLabelTop.logFC <- function(

# Check if the clustering was ordered _____________________________________________________
sfx.ord <- ifelse(grepl("ordered", group.by), group.by, "")
namedIDslot <- sppp("cl.names.top.gene.", sfx.ord, "res", res)
namedIDslot <- sppp("cl.names.top.gene.", sfx.ord)

obj <- addMetaDataSafe(obj = obj, metadata = as.character(named.group.by), col.name = namedIDslot, overwrite = TRUE)
if (plot.top.genes) multiFeaturePlot.A4(list.of.genes = top.markers, suffix = suffix, obj = obj)
Expand Down Expand Up @@ -3323,11 +3335,13 @@ gene.name.check <- function(Seu.obj) {
#' @importFrom Stringendo percentage_formatter
#'
check.genes <- function(
list.of.genes = ClassicMarkers, makeuppercase = FALSE, verbose = TRUE, HGNC.lookup = FALSE,
list.of.genes = ClassicMarkers, makeuppercase = FALSE, HGNC.lookup = FALSE,
obj = combined.obj,
assay.slot = c("RNA", "integrated")[1],
data.slot = c("counts", "data")[2],
verbose = TRUE,
...) {
tictoc::tic()
message(" > Running check.genes...")
message("assay: ", assay.slot, ", data.slot: ", data.slot)

Expand All @@ -3350,6 +3364,7 @@ check.genes <- function(
}
}
}
tictoc::toc()
intersect(list.of.genes, all_genes)
}

Expand Down
10 changes: 6 additions & 4 deletions R/Seurat.Utils.Visualization.R
Original file line number Diff line number Diff line change
Expand Up @@ -2473,6 +2473,7 @@ DimPlot.ClusterNames <- function(
#' @description Save multiple FeaturePlots, as jpeg, on A4 for each gene, which are stored as a list of gene names.
#' @param list.of.genes List of gene names for which the plots are to be generated. No default.
#' @param obj Seurat object, Default: combined.obj
#' @param subdir Should plots be saved in a sub-directory? Default: TRUE
#' @param foldername Folder name to save the generated plots. Default: The name of the list of genes.
#' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap'
#' @param intersectionAssay The assay to intersect with, either 'RNA' or 'integrated'. Default: 'RNA'
Expand All @@ -2483,7 +2484,6 @@ DimPlot.ClusterNames <- function(
#' @param cex Point size in the plot. Default: round(0.1/(nr.Col * nr.Row), digits = 2)
#' @param gene.min.exp Minimum gene expression level for plotting. Default: 'q01'
#' @param gene.max.exp Maximum gene expression level for plotting. Default: 'q99'
#' @param subdir Should plots be saved in a sub-directory? Default: TRUE
#' @param prefix Prefix for the plot filenames. Default: NULL
#' @param suffix Suffix for the plot filenames. Default: NULL
#' @param background_col Background color of the plots. Default: "white"
Expand All @@ -2505,15 +2505,17 @@ DimPlot.ClusterNames <- function(
multiFeaturePlot.A4 <- function(
list.of.genes,
obj = combined.obj,
foldername = substitute(list.of.genes), plot.reduction = "umap",
subdir = TRUE,
foldername = substitute(list.of.genes),
plot.reduction = "umap",
intersectionAssay = c("RNA", "integrated")[1],
layout = c("tall", "wide", FALSE)[2],
colors = c("grey", "red"),
nr.Col = 2, nr.Row = 4,
raster = if (ncol(obj) > 1e5) TRUE else FALSE,
cex = round(0.1 / (nr.Col * nr.Row), digits = 2),
cex.min = if (raster) TRUE else FALSE,
gene.min.exp = "q01", gene.max.exp = "q99", subdir = TRUE,
gene.min.exp = "q01", gene.max.exp = "q99",
prefix = NULL, suffix = NULL,
background_col = "white",
aspect.ratio = c(FALSE, 0.6)[2],
Expand Down Expand Up @@ -2946,7 +2948,7 @@ PlotTopGenesPerCluster <- function(
ls.topMarkers <- splitbyitsnames(topX.markers)
for (i in 1:length(ls.topMarkers)) {
multiFeaturePlot.A4(
list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = FALSE,
list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = T, foldername = ppp('TopGenes.umaps'),
prefix = ppp("DEG.markers.res", cl_res, "cluster", names(ls.topMarkers)[i])
)
}
Expand Down
6 changes: 3 additions & 3 deletions man/check.genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/filterNcGenes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/multiFeaturePlot.A4.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/removeClustersAndDropLevels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a78a650

Please sign in to comment.