From 28eff2571fada6b8d114e105b88b9fb86a5c70d5 Mon Sep 17 00:00:00 2001 From: Stefano Monti Date: Sat, 9 Apr 2022 11:46:57 -0400 Subject: [PATCH 01/13] added leading edge functionalities --- R/ks_enrichment.R | 211 +++++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 97 deletions(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index c5b48a1..f47216e 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -16,75 +16,83 @@ y, weights=NULL, weights.pwr=1, - absolute=FALSE, + absolute=FALSE, # this is not really implemented, should be removed plotting=FALSE, - plot.title="") { - - n.y <- length(y) - err = list(score=0, pval=1, plot=ggempty()) - if (n.y < 1 ) return(err) - if (any(y > n.x)) return(err) - if (any(y < 1)) return(err) - - x.axis <- y.axis <- NULL + plot.title="") +{ + n.y <- length(y) + err = list(score=0, pval=1, plot=ggempty()) + if (n.y < 1 ) return(err) + if (any(y > n.x)) return(err) + if (any(y < 1)) return(err) + + x.axis <- y.axis <- NULL + leading_edge <- NULL # recording the x corresponding to the highest y value + + # If weights are provided + if (!is.null(weights)) { + weights <- abs(weights[y])^weights.pwr - # If weights are provided - if (!is.null(weights)) { - weights <- abs(weights[y])^weights.pwr + Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y) + Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x] + z <- Phit-Pmis - Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y) - Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x] - z <- Phit-Pmis + score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))] - score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))] - - x.axis <- 1:n.x - y.axis <- z + x.axis <- 1:n.x + y.axis <- z # Without weights - } else { - y <- sort(y) - n <- n.x*n.y/(n.x + n.y) - hit <- 1/n.y - mis <- 1/n.x - - Y <- sort(c(y-1, y)) - Y <- Y[diff(Y) != 0] - y.match <- match(y, Y) - D <- rep(0, length(Y)) - D[y.match] <- (1:n.y) - zero <- which(D == 0)[-1] - D[zero] <- D[zero-1] - - z <- D*hit-Y*mis - - score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))] - - x.axis <- Y - y.axis <- z - - if (Y[1] > 0) { - x.axis <- c(0, x.axis) - y.axis <- c(0, y.axis) - } - if (max(Y) < n.x) { - x.axis <- c(x.axis, n.x) - y.axis <- c(y.axis, 0) - } - } + } else { + y <- sort(y) + n <- n.x*n.y/(n.x + n.y) + hit <- 1/n.y + mis <- 1/n.x + + Y <- sort(c(y-1, y)) # append the positions preceding hits + Y <- Y[diff(Y) != 0] # remove repeated position + y.match <- match(y, Y) # find the hits' positions + D <- rep(0, length(Y)) + D[y.match] <- (1:n.y) + zero <- which(D == 0)[-1] + D[zero] <- D[zero-1] - # One-sided Kolmogorov–Smirnov test - results <- suppressWarnings(ks.test(1:n.x, y, alternative="less")) - results$statistic <- score # Use the signed statistic - - # Enrichment plot - p <- if (plotting) ggeplot(n.x, y, x.axis, y.axis, plot.title) else ggempty() - - return(list(score=as.numeric(results$statistic), - pval=results$p.value, - plot=p)) + z <- D*hit-Y*mis + + score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))] + + x.axis <- Y + y.axis <- z + + if (Y[1] > 0) { + x.axis <- c(0, x.axis) + y.axis <- c(0, y.axis) + } + if (max(Y) < n.x) { + x.axis <- c(x.axis, n.x) + y.axis <- c(y.axis, 0) + } + } + leading_edge <- x.axis[leading_edge] + leading_hits <- intersect(x.axis[x.axis<=leading_edge],y) + + # One-sided Kolmogorov–Smirnov test + results <- suppressWarnings(ks.test(1:n.x, y, alternative="less")) + results$statistic <- score # Use the signed statistic + + # Enrichment plot + p <- if (plotting) { + ggeplot(n.x, y, x.axis, y.axis, plot.title) + + geom_vline(xintercept=leading_edge, linetype="dotted", color="red", size=0.25) + } + else ggempty() + + return(list(score=as.numeric(results$statistic), + pval=results$p.value, + leading_edge=leading_edge, + leading_hits=leading_hits, + plot=p)) } - #' Enrichment test via one-sided Kolmogorov–Smirnov test #' #' @param signature A vector of ranked symbols @@ -101,43 +109,52 @@ weights=NULL, weights.pwr=1, absolute=FALSE, - plotting=TRUE) { + plotting=TRUE) +{ + if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n") + if (!is.null(weights)) stopifnot(length(signature) == length(weights)) + + signature <- unique(signature) + genesets <- lapply(genesets, unique) - if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n") - if (!is.null(weights)) stopifnot(length(signature) == length(weights)) + results <- mapply(function(geneset, title) { - signature <- unique(signature) - genesets <- lapply(genesets, unique) - - results <- mapply(function(geneset, title) { - - ranks <- match(geneset, signature) - ranks <- ranks[!is.na(ranks)] - - # Run ks-test - results <- .kstest(n.x=length(signature), - y=ranks, - weights=weights, - weights.pwr=weights.pwr, - absolute=absolute, - plotting=plotting, - plot.title=title) - - results[['geneset']] <- length(geneset) - results[['overlap']] <- length(ranks) - return(results) - - }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) - - results <- do.call(rbind, results) - data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), stringsAsFactors=FALSE) - data$score <- signif(data$score, 2) - data$pval <- signif(data$pval, 2) - data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2) - data$label <- names(genesets) - data$signature <- length(signature) - data <- data[,c("label", "pval", "fdr", "signature", "geneset", "overlap", "score")] - plots <- results[,"plot"] + ranks <- match(geneset, signature) + ranks <- ranks[!is.na(ranks)] - return(list(data=data, plots=plots)) + ## Run ks-test + results <- .kstest(n.x=length(signature), + y=ranks, + weights=weights, + weights.pwr=weights.pwr, + absolute=absolute, + plotting=plotting, + plot.title=title) + + results[['geneset']] <- length(geneset) + results[['overlap']] <- length(results$leading_hits) + return(results) + + }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) + + results <- do.call(rbind, results) + data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), + stringsAsFactors = FALSE) + ## add list of genes in the leading edge + data <- data %>% + dplyr::mutate(hits=sapply(results[,"leading_hits"],function(x) paste(signature[x], collapse=','))) + data$score <- signif(data$score, 2) + data$pval <- signif(data$pval, 2) + data$label <- names(genesets) + data$signature <- length(signature) + data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2) + data <- data %>% + dplyr::relocate(fdr,.after=pval) %>% + dplyr::relocate(signature,.after=geneset) %>% + dplyr::relocate(label) + plots <- results[,"plot"] + + return(list(data=data, + plots=plots, + leading_hits=sapply(results[,"leading_hits"],function(x) signature[x]))) } From bdb2531ff292658bd3d968a464842197b10cc887 Mon Sep 17 00:00:00 2001 From: Stefano Monti Date: Mon, 21 Nov 2022 19:50:19 -0500 Subject: [PATCH 02/13] minor fix in hypeR with ks.test --- R/hype.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/hype.R b/R/hype.R index 956d72b..bddcdd1 100644 --- a/R/hype.R +++ b/R/hype.R @@ -117,8 +117,8 @@ hypeR <- function(signature, weights <- NULL signature.type <- "ranked" } - data <- data.frame(matrix(ncol=7, nrow=0)) - colnames(data) <- c("label", "pval", "fdr", "signature", "geneset", "overlap", "score") + data <- data.frame(matrix(ncol=8, nrow=0)) + colnames(data) <- c("label", "score", "pval", "fdr", "geneset", "signature", "overlap", "hits") results <- .ks_enrichment(signature=signature, genesets=gsets.obj$genesets, weights=weights, From b60aae4139ec3db3296ccad3c5d6661c934745ae Mon Sep 17 00:00:00 2001 From: Stefano Monti Date: Sat, 9 Apr 2022 11:46:57 -0400 Subject: [PATCH 03/13] added leading edge functionalities --- R/ks_enrichment.R | 211 +++++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 97 deletions(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index c5b48a1..f47216e 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -16,75 +16,83 @@ y, weights=NULL, weights.pwr=1, - absolute=FALSE, + absolute=FALSE, # this is not really implemented, should be removed plotting=FALSE, - plot.title="") { - - n.y <- length(y) - err = list(score=0, pval=1, plot=ggempty()) - if (n.y < 1 ) return(err) - if (any(y > n.x)) return(err) - if (any(y < 1)) return(err) - - x.axis <- y.axis <- NULL + plot.title="") +{ + n.y <- length(y) + err = list(score=0, pval=1, plot=ggempty()) + if (n.y < 1 ) return(err) + if (any(y > n.x)) return(err) + if (any(y < 1)) return(err) + + x.axis <- y.axis <- NULL + leading_edge <- NULL # recording the x corresponding to the highest y value + + # If weights are provided + if (!is.null(weights)) { + weights <- abs(weights[y])^weights.pwr - # If weights are provided - if (!is.null(weights)) { - weights <- abs(weights[y])^weights.pwr + Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y) + Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x] + z <- Phit-Pmis - Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y) - Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x] - z <- Phit-Pmis + score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))] - score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))] - - x.axis <- 1:n.x - y.axis <- z + x.axis <- 1:n.x + y.axis <- z # Without weights - } else { - y <- sort(y) - n <- n.x*n.y/(n.x + n.y) - hit <- 1/n.y - mis <- 1/n.x - - Y <- sort(c(y-1, y)) - Y <- Y[diff(Y) != 0] - y.match <- match(y, Y) - D <- rep(0, length(Y)) - D[y.match] <- (1:n.y) - zero <- which(D == 0)[-1] - D[zero] <- D[zero-1] - - z <- D*hit-Y*mis - - score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))] - - x.axis <- Y - y.axis <- z - - if (Y[1] > 0) { - x.axis <- c(0, x.axis) - y.axis <- c(0, y.axis) - } - if (max(Y) < n.x) { - x.axis <- c(x.axis, n.x) - y.axis <- c(y.axis, 0) - } - } + } else { + y <- sort(y) + n <- n.x*n.y/(n.x + n.y) + hit <- 1/n.y + mis <- 1/n.x + + Y <- sort(c(y-1, y)) # append the positions preceding hits + Y <- Y[diff(Y) != 0] # remove repeated position + y.match <- match(y, Y) # find the hits' positions + D <- rep(0, length(Y)) + D[y.match] <- (1:n.y) + zero <- which(D == 0)[-1] + D[zero] <- D[zero-1] - # One-sided Kolmogorov–Smirnov test - results <- suppressWarnings(ks.test(1:n.x, y, alternative="less")) - results$statistic <- score # Use the signed statistic - - # Enrichment plot - p <- if (plotting) ggeplot(n.x, y, x.axis, y.axis, plot.title) else ggempty() - - return(list(score=as.numeric(results$statistic), - pval=results$p.value, - plot=p)) + z <- D*hit-Y*mis + + score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))] + + x.axis <- Y + y.axis <- z + + if (Y[1] > 0) { + x.axis <- c(0, x.axis) + y.axis <- c(0, y.axis) + } + if (max(Y) < n.x) { + x.axis <- c(x.axis, n.x) + y.axis <- c(y.axis, 0) + } + } + leading_edge <- x.axis[leading_edge] + leading_hits <- intersect(x.axis[x.axis<=leading_edge],y) + + # One-sided Kolmogorov–Smirnov test + results <- suppressWarnings(ks.test(1:n.x, y, alternative="less")) + results$statistic <- score # Use the signed statistic + + # Enrichment plot + p <- if (plotting) { + ggeplot(n.x, y, x.axis, y.axis, plot.title) + + geom_vline(xintercept=leading_edge, linetype="dotted", color="red", size=0.25) + } + else ggempty() + + return(list(score=as.numeric(results$statistic), + pval=results$p.value, + leading_edge=leading_edge, + leading_hits=leading_hits, + plot=p)) } - #' Enrichment test via one-sided Kolmogorov–Smirnov test #' #' @param signature A vector of ranked symbols @@ -101,43 +109,52 @@ weights=NULL, weights.pwr=1, absolute=FALSE, - plotting=TRUE) { + plotting=TRUE) +{ + if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n") + if (!is.null(weights)) stopifnot(length(signature) == length(weights)) + + signature <- unique(signature) + genesets <- lapply(genesets, unique) - if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n") - if (!is.null(weights)) stopifnot(length(signature) == length(weights)) + results <- mapply(function(geneset, title) { - signature <- unique(signature) - genesets <- lapply(genesets, unique) - - results <- mapply(function(geneset, title) { - - ranks <- match(geneset, signature) - ranks <- ranks[!is.na(ranks)] - - # Run ks-test - results <- .kstest(n.x=length(signature), - y=ranks, - weights=weights, - weights.pwr=weights.pwr, - absolute=absolute, - plotting=plotting, - plot.title=title) - - results[['geneset']] <- length(geneset) - results[['overlap']] <- length(ranks) - return(results) - - }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) - - results <- do.call(rbind, results) - data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), stringsAsFactors=FALSE) - data$score <- signif(data$score, 2) - data$pval <- signif(data$pval, 2) - data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2) - data$label <- names(genesets) - data$signature <- length(signature) - data <- data[,c("label", "pval", "fdr", "signature", "geneset", "overlap", "score")] - plots <- results[,"plot"] + ranks <- match(geneset, signature) + ranks <- ranks[!is.na(ranks)] - return(list(data=data, plots=plots)) + ## Run ks-test + results <- .kstest(n.x=length(signature), + y=ranks, + weights=weights, + weights.pwr=weights.pwr, + absolute=absolute, + plotting=plotting, + plot.title=title) + + results[['geneset']] <- length(geneset) + results[['overlap']] <- length(results$leading_hits) + return(results) + + }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) + + results <- do.call(rbind, results) + data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), + stringsAsFactors = FALSE) + ## add list of genes in the leading edge + data <- data %>% + dplyr::mutate(hits=sapply(results[,"leading_hits"],function(x) paste(signature[x], collapse=','))) + data$score <- signif(data$score, 2) + data$pval <- signif(data$pval, 2) + data$label <- names(genesets) + data$signature <- length(signature) + data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2) + data <- data %>% + dplyr::relocate(fdr,.after=pval) %>% + dplyr::relocate(signature,.after=geneset) %>% + dplyr::relocate(label) + plots <- results[,"plot"] + + return(list(data=data, + plots=plots, + leading_hits=sapply(results[,"leading_hits"],function(x) signature[x]))) } From 0365b0a016915a96bebefecacc2a9b233ffa1d21 Mon Sep 17 00:00:00 2001 From: Stefano Monti Date: Mon, 21 Nov 2022 19:50:19 -0500 Subject: [PATCH 04/13] minor fix in hypeR with ks.test --- R/hype.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/hype.R b/R/hype.R index 956d72b..bddcdd1 100644 --- a/R/hype.R +++ b/R/hype.R @@ -117,8 +117,8 @@ hypeR <- function(signature, weights <- NULL signature.type <- "ranked" } - data <- data.frame(matrix(ncol=7, nrow=0)) - colnames(data) <- c("label", "pval", "fdr", "signature", "geneset", "overlap", "score") + data <- data.frame(matrix(ncol=8, nrow=0)) + colnames(data) <- c("label", "score", "pval", "fdr", "geneset", "signature", "overlap", "hits") results <- .ks_enrichment(signature=signature, genesets=gsets.obj$genesets, weights=weights, From 75ab801800022180b9abc2ae2595381e49982185 Mon Sep 17 00:00:00 2001 From: andrewdchen Date: Thu, 15 Jun 2023 15:12:45 -0400 Subject: [PATCH 05/13] Fixing bugs for leading edge edge cases --- R/ks_enrichment.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index f47216e..2fb1a7b 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -21,7 +21,7 @@ plot.title="") { n.y <- length(y) - err = list(score=0, pval=1, plot=ggempty()) + err = list(score=0, pval=1, leading_edge=NULL, leading_hits=NA, plot=ggempty()) if (n.y < 1 ) return(err) if (any(y > n.x)) return(err) if (any(y < 1)) return(err) @@ -132,7 +132,18 @@ plot.title=title) results[['geneset']] <- length(geneset) - results[['overlap']] <- length(results$leading_hits) + edge_idx <- results[['leading_edge']] + + if(is.null(edge_idx)) { + results[['hits']] <- NA + results[['overlap']] <- 0 + } else if (!is.null(edge_idx) & edge_idx == 0) { + results[['hits']] <- NA + results[['overlap']] <- 0 + } else { + results[['hits']] <- paste0("'", signature[results[['leading_hits']]], "'", collapse=',') + results[['overlap']] <- edge_idx + } return(results) }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) @@ -141,8 +152,7 @@ data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), stringsAsFactors = FALSE) ## add list of genes in the leading edge - data <- data %>% - dplyr::mutate(hits=sapply(results[,"leading_hits"],function(x) paste(signature[x], collapse=','))) + data$hits <- results[,"hits"] data$score <- signif(data$score, 2) data$pval <- signif(data$pval, 2) data$label <- names(genesets) @@ -153,8 +163,6 @@ dplyr::relocate(signature,.after=geneset) %>% dplyr::relocate(label) plots <- results[,"plot"] - return(list(data=data, - plots=plots, - leading_hits=sapply(results[,"leading_hits"],function(x) signature[x]))) + plots=plots)) } From 5e3b4095cd7ca973c8fa1194e160d5df6709fbf1 Mon Sep 17 00:00:00 2001 From: andrewdchen Date: Thu, 15 Jun 2023 15:13:03 -0400 Subject: [PATCH 06/13] Tests for ks leading edge feature --- tests/testthat/test-hype.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test-hype.R b/tests/testthat/test-hype.R index 0dc5ec7..4f7db97 100644 --- a/tests/testthat/test-hype.R +++ b/tests/testthat/test-hype.R @@ -106,6 +106,29 @@ test_that("Hypergeometric is working", { expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) }) +test_that("KS Test is working", { + + genesets <- msigdb_gsets("Homo sapiens", "C2", "CP:KEGG")$genesets[1:5] + all_genes <- genesets %>% unlist %>% unname + genesets_names <- names(genesets) + + experiment <- c(head(genesets[[1]], 5), LETTERS, tail(genesets[[1]], 1)) + + # Geneset 1 is top skewed + hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") + expect_equal(hyp_obj$data[genesets_names[[1]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[1]][1:5], "'",collapse=",")) + + # Geneset 2 is mixed + experiment <- c(head(genesets[[2]], 8), LETTERS, tail(genesets[[2]], 10)) + hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") + expect_equal(hyp_obj$data[genesets_names[[2]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[2]][1:8], "'",collapse=",")) + + # Geneset 3 is bottom skewed + experiment <- c(head(genesets[[3]], 1), LETTERS, tail(genesets[[3]], 8)) + hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") + expect_equal(hyp_obj$data[genesets_names[[3]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[3]][1], "'",collapse=",")) +}) + test_that("hypeR() is working", { # Testing data testdat <- readRDS(file.path(system.file("extdata", package="hypeR"), "testdat.rds")) From ef8318e7711ce3bfd4933a9ffb0239af3dddd759 Mon Sep 17 00:00:00 2001 From: tetomonti Date: Thu, 31 Aug 2023 10:09:04 -0400 Subject: [PATCH 07/13] bug corrected: ggeplot --> ggplot --- R/ks_enrichment.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index 88dfc1f..f6b84cd 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -83,7 +83,7 @@ # Enrichment plot p <- if (plotting) { - ggeplot(n.x, y, x.axis, y.axis, plot.title) + + ggplot2::ggplot(n.x, y, x.axis, y.axis, plot.title) + geom_vline(xintercept=leading_edge, linetype="dotted", color="red", size=0.25) } else ggempty() From 03b05bb4da2b25221fe983e2731ce115c12ef21e Mon Sep 17 00:00:00 2001 From: tetomonti Date: Thu, 31 Aug 2023 10:12:47 -0400 Subject: [PATCH 08/13] mistaken correction reverted: ggplot --> ggeplot --- R/ks_enrichment.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index f6b84cd..88dfc1f 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -83,7 +83,7 @@ # Enrichment plot p <- if (plotting) { - ggplot2::ggplot(n.x, y, x.axis, y.axis, plot.title) + + ggeplot(n.x, y, x.axis, y.axis, plot.title) + geom_vline(xintercept=leading_edge, linetype="dotted", color="red", size=0.25) } else ggempty() From a74b3b8b5de9a426037d55521d0d551478025704 Mon Sep 17 00:00:00 2001 From: andrewdchen Date: Thu, 31 Aug 2023 14:35:31 -0400 Subject: [PATCH 09/13] Fixing broken hyp_dots test --- tests/testthat/test-hyp_dots.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-hyp_dots.R b/tests/testthat/test-hyp_dots.R index 9ffc7f5..af2265c 100644 --- a/tests/testthat/test-hyp_dots.R +++ b/tests/testthat/test-hyp_dots.R @@ -7,7 +7,7 @@ hyp_dots_tests <- function(hyp_obj, return_obj=FALSE) { expect_silent(hyp_dots(hyp_obj, size_by="genesets")) expect_silent(hyp_dots(hyp_obj, size_by="significance")) expect_silent(hyp_dots(hyp_obj, size_by="none")) - p <- hyp_dots(hyp_obj, "gg") + p <- hyp_dots(hyp_obj) expect_is(p, "gg") if (return_obj) return(hyp_obj) } @@ -17,11 +17,11 @@ test_that("hyp_dots() is working", { testdat <- readRDS(file.path(system.file("extdata", package="hypeR"), "testdat.rds")) gsets_obj <- testdat$gsets rgsets_obj <- testdat$rgsets - + # Overrepresentation (signature) signature <- testdat$signature experiment <- testdat$experiment - + hypeR(signature, gsets_obj, test="hypergeometric", background=100) %>% hyp_dots_tests() hypeR(signature, rgsets_obj, test="hypergeometric", background=100) %>% @@ -31,7 +31,7 @@ test_that("hyp_dots() is working", { expect_equal(length(p), 3) expect_equal(names(p), c("Signature 1", "Signature 2", "Signature 3")) expect_is(p[["Signature 3"]], "gg") - + # Enrichment (ranked signature) signature <- names(testdat$weighted_signature) experiment <- lapply(testdat$weighted_experiment, names) @@ -45,11 +45,11 @@ test_that("hyp_dots() is working", { expect_equal(length(p), 3) expect_equal(names(p), c("Signature 1", "Signature 2", "Signature 3")) expect_is(p[["Signature 3"]], "gg") - + # Enrichment (weighted signature) signature <- testdat$weighted_signature experiment <- testdat$weighted_experiment - + hypeR(signature, gsets_obj, test="kstest") %>% hyp_dots_tests() hypeR(signature, rgsets_obj, test="kstest") %>% From 68649b863be211e41d8011dd02c03a738777bcd6 Mon Sep 17 00:00:00 2001 From: tetomonti Date: Fri, 15 Sep 2023 11:46:24 -0400 Subject: [PATCH 10/13] reconciled handling of leading edge --- R/ks_enrichment.R | 134 ++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 69 deletions(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index 88dfc1f..33dd637 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -21,17 +21,15 @@ plot.title="") { n.y <- length(y) - err = list(score=0, pval=1, leading_edge=NULL, leading_hits=NA, plot=ggempty()) - - if (n.y < 1 ) return(err) - if (any(y > n.x)) return(err) - if (any(y < 1)) return(err) - + err <- list(score = 0, pval = 1, leading_edge = NULL, leading_hits = NULL, plot = ggempty()) + if (n.y < 1 || any(y > n.x) || any(y < 1) ) { + return(err) + } x.axis <- y.axis <- NULL leading_edge <- NULL # recording the x corresponding to the highest y value - # If weights are provided - if (!is.null(weights)) { + ## If weights are provided + if ( !is.null(weights) ) { weights <- abs(weights[y])^weights.pwr Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y) @@ -42,9 +40,9 @@ x.axis <- 1:n.x y.axis <- z - - # Without weights - } else { + } + ## Without weights + else { y <- sort(y) n <- n.x*n.y/(n.x + n.y) hit <- 1/n.y @@ -57,7 +55,7 @@ D[y.match] <- (1:n.y) zero <- which(D == 0)[-1] D[zero] <- D[zero-1] - + z <- D*hit-Y*mis score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))] @@ -75,24 +73,26 @@ } } leading_edge <- x.axis[leading_edge] - leading_hits <- intersect(x.axis[x.axis<=leading_edge],y) + leading_hits <- intersect(x.axis[x.axis <= leading_edge], y) - # One-sided Kolmogorov–Smirnov test + ## One-sided Kolmogorov–Smirnov test results <- suppressWarnings(ks.test(1:n.x, y, alternative="less")) results$statistic <- score # Use the signed statistic - # Enrichment plot - p <- if (plotting) { - ggeplot(n.x, y, x.axis, y.axis, plot.title) + - geom_vline(xintercept=leading_edge, linetype="dotted", color="red", size=0.25) + ## Enrichment plot + p <- if (plotting && n.y > 0) { + ggeplot(n.x, y, x.axis, y.axis, plot.title) + + geom_vline(xintercept = leading_edge, linetype = "dotted", color = "red", size = 0.25) + } else { + ggempty() } - else ggempty() - - return(list(score=as.numeric(results$statistic), - pval=results$p.value, - leading_edge=leading_edge, - leading_hits=leading_hits, - plot=p)) + return(list( + score = as.numeric(results$statistic), + pval = results$p.value, + leading_edge = leading_edge, + leading_hits = leading_hits, + plot = p + )) } #' Enrichment test via one-sided Kolmogorov–Smirnov test #' @@ -105,66 +105,62 @@ #' @return A list of data and plots #' #' @keywords internal -.ks_enrichment <- function(signature, - genesets, - weights=NULL, - weights.pwr=1, - absolute=FALSE, - plotting=TRUE) +.ks_enrichment <- function( + signature, + genesets, + weights = NULL, + weights.pwr = 1, + absolute = FALSE, + plotting = TRUE) { if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n") if (!is.null(weights)) stopifnot(length(signature) == length(weights)) - + signature <- unique(signature) genesets <- lapply(genesets, unique) - - results <- mapply(function(geneset, title) { - + + results <- mapply( function(geneset, title) { ranks <- match(geneset, signature) ranks <- ranks[!is.na(ranks)] ## Run ks-test - results <- .kstest(n.x=length(signature), - y=ranks, - weights=weights, - weights.pwr=weights.pwr, - absolute=absolute, - plotting=plotting, - plot.title=title) - - results[['geneset']] <- length(geneset) - edge_idx <- results[['leading_edge']] + results_in <- .kstest( + n.x = length(signature), + y = ranks, + weights = weights, + weights.pwr = weights.pwr, + absolute = absolute, + plotting = plotting, + plot.title = title + ) + #results_in[["geneset"]] <- length(geneset) + results_in[["geneset"]] <- length(intersect(geneset, signature)) + results_in[["overlap"]] <- length(results_in$leading_hits) + return(results_in) + }, genesets, names(genesets), USE.NAMES = TRUE, SIMPLIFY = FALSE) - if(is.null(edge_idx)) { - results[['hits']] <- NA - results[['overlap']] <- 0 - } else if (!is.null(edge_idx) & edge_idx == 0) { - results[['hits']] <- NA - results[['overlap']] <- 0 - } else { - results[['hits']] <- paste0("'", signature[results[['leading_hits']]], "'", collapse=',') - results[['overlap']] <- edge_idx - } - return(results) - - }, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE) - results <- do.call(rbind, results) - data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), - stringsAsFactors = FALSE) + data <- data.frame(apply(results[, c("score", "pval", "geneset", "overlap")], 2, unlist), + stringsAsFactors = FALSE + ) ## add list of genes in the leading edge - data$hits <- results[,"hits"] + data <- data %>% + dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = ","))) data$score <- signif(data$score, 2) data$pval <- signif(data$pval, 2) data$label <- names(genesets) data$signature <- length(signature) - data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2) + data$fdr <- signif(p.adjust(data$pval, method = "fdr"), 2) data <- data %>% - dplyr::relocate(fdr,.after=pval) %>% - dplyr::relocate(signature,.after=geneset) %>% - dplyr::relocate(label) - plots <- results[,"plot"] + dplyr::relocate(fdr, .after = pval) %>% + dplyr::relocate(signature, .after = geneset) %>% + dplyr::relocate(label) |> + tibble::remove_rownames() # make sure this is OK + plots <- results[, "plot"] - return(list(data=data, - plots=plots)) + return(list( + data = data, + plots = plots, + leading_hits = sapply(results[, "leading_hits"], function(x) signature[x]) + )) } From 4a489fc115da92e4ed910586059aa8c3d5ad3f3f Mon Sep 17 00:00:00 2001 From: tetomonti Date: Fri, 15 Sep 2023 13:47:11 -0400 Subject: [PATCH 11/13] multiple edits, including split of hits by space-comma-space --- R/ggeplot.R | 40 +++++++++++---------- R/ggvenn.R | 2 +- R/ks_enrichment.R | 5 +-- tests/testthat/test-hyp_to_table.R | 37 ++++++++++--------- tests/testthat/test-hype.R | 57 ++++++++++++++++-------------- 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/R/ggeplot.R b/R/ggeplot.R index 779a648..c46c484 100644 --- a/R/ggeplot.R +++ b/R/ggeplot.R @@ -10,22 +10,26 @@ #' @importFrom ggplot2 qplot aes geom_rug geom_hline geom_vline annotate theme element_text element_blank element_line element_rect #' #' @export -ggeplot <- function(n, positions, x_axis, y_axis, title="") { - score <- which.max(abs(y_axis)) - qplot(x_axis, - y_axis, - main=title, - ylab="Running Enrichment Score", - xlab="Position in Ranked List of Genes", - geom="line")+ - geom_rug(data=data.frame(positions), aes(x=positions), inherit.aes=FALSE)+ - geom_hline(yintercept=0) + - geom_vline(xintercept=n/2, linetype="dotted") + - annotate("point", x=x_axis[score], y=y_axis[score], color="red") + - annotate("text", x=x_axis[score]+n/20, y=y_axis[score], label=round(y_axis[score],2)) + - annotate("point", x=x_axis[score], y=y_axis[score], color="red") + - theme(plot.title=element_text(hjust=0.5), - panel.background=element_blank(), - axis.line=element_line(color="black"), - panel.border=element_rect(color="black", fill=NA, size=1)) +ggeplot <- function(n, positions, x_axis, y_axis, title = "") { + score <- which.max(abs(y_axis)) + DF <- data.frame(x_axis = x_axis, y_axis = y_axis) + ggplot2::ggplot(DF, aes(x = x_axis, y = y_axis)) + + ggplot2::geom_line() + + ggplot2::labs( + title = title, + y = "Running Enrichment Score", + x = "Position in Ranked List of Genes" + ) + + ggplot2::geom_rug(data = data.frame(positions), aes(x = positions), inherit.aes = FALSE) + + ggplot2::geom_hline(yintercept = 0) + + ggplot2::geom_vline(xintercept = n / 2, linetype = "dotted") + + ggplot2::annotate("point", x = x_axis[score], y = y_axis[score], color = "red") + + ggplot2::annotate("text", x = x_axis[score] + n / 20, y = y_axis[score], label = round(y_axis[score], 2)) + + ggplot2::annotate("point", x = x_axis[score], y = y_axis[score], color = "red") + + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(color = "black"), + panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1) + ) } diff --git a/R/ggvenn.R b/R/ggvenn.R index 60964eb..3853330 100644 --- a/R/ggvenn.R +++ b/R/ggvenn.R @@ -37,7 +37,7 @@ ggvenn <- function(a, b, ga, gb, title="") { paste(gb, " (", x.b, ")", sep=""))) %>% ggplot(aes(x0=x, y0=y, r=c(r.a, r.b), fill=groups)) + - geom_circle(alpha=0.3, size=0.5) + + geom_circle(alpha=0.3, linewidth=0.5) + coord_fixed() + theme_void() + theme(plot.title=element_text(hjust=0.5), diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index 33dd637..93ce0e7 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -82,7 +82,7 @@ ## Enrichment plot p <- if (plotting && n.y > 0) { ggeplot(n.x, y, x.axis, y.axis, plot.title) + - geom_vline(xintercept = leading_edge, linetype = "dotted", color = "red", size = 0.25) + geom_vline(xintercept = leading_edge, linetype = "dotted", color = "red", linewidth = 0.25) } else { ggempty() } @@ -145,7 +145,8 @@ ) ## add list of genes in the leading edge data <- data %>% - dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = ","))) + #dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = ","))) + dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = " , "))) data$score <- signif(data$score, 2) data$pval <- signif(data$pval, 2) data$label <- names(genesets) diff --git a/tests/testthat/test-hyp_to_table.R b/tests/testthat/test-hyp_to_table.R index dd8d69e..b34a17b 100644 --- a/tests/testthat/test-hyp_to_table.R +++ b/tests/testthat/test-hyp_to_table.R @@ -1,22 +1,21 @@ test_that("hyp_to_table() is working", { + testdat <- readRDS(file.path(system.file("extdata", package = "hypeR"), "testdat.rds")) + gsets_obj <- testdat$gsets + rgsets_obj <- testdat$rgsets - testdat <- readRDS(file.path(system.file("extdata", package="hypeR"), "testdat.rds")) - gsets_obj <- testdat$gsets - rgsets_obj <- testdat$rgsets - - signature <- testdat$signature - experiment <- testdat$experiment - - hyp_obj <- hypeR(signature, gsets_obj) - multihyp_obj <- hypeR(experiment, rgsets_obj) - - # A single file - hyp_to_table(hyp_obj, file_path="signature.txt", sep="\t") - expect_true(file.exists("signature.txt")) - - # Multiple files within a directory - hyp_to_table(multihyp_obj, file_path="experiment", sep="\t") - expect_true(file.exists("experiment/Signature 1.txt")) - expect_true(file.exists("experiment/Signature 2.txt")) - expect_true(file.exists("experiment/Signature 3.txt")) + signature <- testdat$signature + experiment <- testdat$experiment + + hyp_obj <- hypeR(signature, gsets_obj) + multihyp_obj <- hypeR(experiment, rgsets_obj) + + # A single file + hyp_to_table(hyp_obj, file_path = "signature.txt", sep = "\t") + expect_true(file.exists("signature.txt")) + + # Multiple files within a directory + hyp_to_table(multihyp_obj, file_path = "experiment", sep = "\t") + expect_true(file.exists("experiment/Signature 1.txt")) + expect_true(file.exists("experiment/Signature 2.txt")) + expect_true(file.exists("experiment/Signature 3.txt")) }) diff --git a/tests/testthat/test-hype.R b/tests/testthat/test-hype.R index 4f7db97..f5e0cae 100644 --- a/tests/testthat/test-hype.R +++ b/tests/testthat/test-hype.R @@ -15,23 +15,23 @@ hyp_tests <- function(hyp_obj, test_plots=FALSE, return_obj=FALSE) { hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets_obj) { # Basic - hypeR(signature, genesets, test=test, background=2520) %>% + hypeR(signature, genesets, test=test, background=2520) |> hyp_tests() # Basic - hypeR(signature, gsets_obj, test=test, background=2520) %>% + hypeR(signature, gsets_obj, test=test, background=2520) |> hyp_tests() # Basic with plots - hypeR(signature, gsets_obj, test=test, background=2520, plotting=TRUE) %>% + hypeR(signature, gsets_obj, test=test, background=2520, plotting=TRUE) |> hyp_tests(test_plots=TRUE) # Test pval_cutoff - hypeR(signature, gsets_obj, test=test, background=100, pval=0.0001) %>% + hypeR(signature, gsets_obj, test=test, background=100, pval=0.0001) |> hyp_tests() # Test fdr_cutoff - hyp_obj <- hypeR(signature, gsets_obj, test=test, background=100, fdr=0.0001) %>% + hyp_obj <- hypeR(signature, gsets_obj, test=test, background=100, fdr=0.0001) |> hyp_tests(return_obj=TRUE) expect_equal(hyp_obj$args$genesets, gsets_obj) @@ -39,7 +39,7 @@ hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets expect_equal(hyp_obj$args$fdr, 0.0001) # Test relational gsets - hyp_obj <- hypeR(signature, rgsets_obj, test=test, background=80, pval=0.01) %>% + hyp_obj <- hypeR(signature, rgsets_obj, test=test, background=80, pval=0.01) |> hyp_tests(return_obj=TRUE) expect_is(hyp_obj$args$genesets, "rgsets") expect_is(hyp_obj$args$genesets, "R6") @@ -51,17 +51,17 @@ hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets expect_equal(names(multihyp_obj$data), c("Signature 1", "Signature 2", "Signature 3")) # Extracting hyp objects - multihyp_obj$data[["Signature 1"]] %>% + multihyp_obj$data[["Signature 1"]] |> hyp_tests() # Extracting hyp objects with plots multihyp_obj <- hypeR(experiment, gsets_obj, test=test, background=100, plotting=TRUE) - multihyp_obj$data[["Signature 1"]] %>% + multihyp_obj$data[["Signature 1"]] |> hyp_tests(test_plots=TRUE) # Test relational gsets multihyp_obj <- hypeR(experiment, rgsets_obj, test=test, background=100, pval=0.05) - multihyp_obj$data[["Signature 2"]] %>% + multihyp_obj$data[["Signature 2"]] |> hyp_tests() } @@ -89,44 +89,49 @@ test_that("Hypergeometric is working", { # Hypergeometric hyp_obj <- hypeR(s, gs, background=length(bg)) - expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, gs$G1, length(bg))) - expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, gs$G2, length(bg))) - expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, gs$G3, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, gs$G1, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, gs$G2, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, gs$G3, length(bg))) # Hypergeometric - Restrict Genesets to Background hyp_obj <- hypeR(s, gs, background=bg[1:18]) - expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) gsets_obj <- gsets$new(gs, quiet=TRUE) hyp_obj <- hypeR(s, gsets_obj, background=bg[1:18]) - expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) }) test_that("KS Test is working", { genesets <- msigdb_gsets("Homo sapiens", "C2", "CP:KEGG")$genesets[1:5] - all_genes <- genesets %>% unlist %>% unname + all_genes <- genesets |> unlist(use.names = FALSE) genesets_names <- names(genesets) - experiment <- c(head(genesets[[1]], 5), LETTERS, tail(genesets[[1]], 1)) - # Geneset 1 is top skewed + experiment <- c(head(genesets[[1]], 5), LETTERS, tail(genesets[[1]], 1)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - expect_equal(hyp_obj$data[genesets_names[[1]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[1]][1:5], "'",collapse=",")) - + #expect_equal(hyp_obj$data[genesets_names[[1]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[1]][1:5], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[1]]) |> dplyr::pull(hits) |> unname(), + paste(genesets[[1]][1:5], collapse = " , ")) + # Geneset 2 is mixed experiment <- c(head(genesets[[2]], 8), LETTERS, tail(genesets[[2]], 10)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - expect_equal(hyp_obj$data[genesets_names[[2]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[2]][1:8], "'",collapse=",")) - + #expect_equal(hyp_obj$data[genesets_names[[2]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[2]][1:8], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[2]]) |> dplyr::pull(hits) |> unname(), + paste(genesets[[2]][1:8], collapse = " , ")) + # Geneset 3 is bottom skewed experiment <- c(head(genesets[[3]], 1), LETTERS, tail(genesets[[3]], 8)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - expect_equal(hyp_obj$data[genesets_names[[3]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[3]][1], "'",collapse=",")) + #expect_equal(hyp_obj$data[genesets_names[[3]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[3]][1], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[3]]) |> dplyr::pull(hits) |> unname(), + paste(genesets[[3]][1], collapse = " , ")) }) test_that("hypeR() is working", { From 46f5859fe589f119b53e0b55f1d1c7fe14a2b637 Mon Sep 17 00:00:00 2001 From: tetomonti Date: Fri, 15 Sep 2023 14:18:27 -0400 Subject: [PATCH 12/13] tried to increase internet timeout --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b5f80e3..4e19cc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ env: - R_BUILD_ARGS="--no-build-vignettes --no-manual" - R_CHECK_ARGS="--no-build-vignettes --no-manual --timings" - _R_CHECK_TIMINGS_="0" + - R_DEFAULT_INTERNET_TIMEOUT="300" r: - release From 73b639a98506d8d6f6d8b32440828aef25f981a2 Mon Sep 17 00:00:00 2001 From: tetomonti Date: Fri, 15 Sep 2023 15:10:55 -0400 Subject: [PATCH 13/13] switched |> to %>% for backward compatibility --- R/ks_enrichment.R | 2 +- tests/testthat/test-hype.R | 50 +++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/R/ks_enrichment.R b/R/ks_enrichment.R index 93ce0e7..0a4aad9 100644 --- a/R/ks_enrichment.R +++ b/R/ks_enrichment.R @@ -155,7 +155,7 @@ data <- data %>% dplyr::relocate(fdr, .after = pval) %>% dplyr::relocate(signature, .after = geneset) %>% - dplyr::relocate(label) |> + dplyr::relocate(label) %>% tibble::remove_rownames() # make sure this is OK plots <- results[, "plot"] diff --git a/tests/testthat/test-hype.R b/tests/testthat/test-hype.R index f5e0cae..485d310 100644 --- a/tests/testthat/test-hype.R +++ b/tests/testthat/test-hype.R @@ -15,23 +15,23 @@ hyp_tests <- function(hyp_obj, test_plots=FALSE, return_obj=FALSE) { hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets_obj) { # Basic - hypeR(signature, genesets, test=test, background=2520) |> + hypeR(signature, genesets, test=test, background=2520) %>% hyp_tests() # Basic - hypeR(signature, gsets_obj, test=test, background=2520) |> + hypeR(signature, gsets_obj, test=test, background=2520) %>% hyp_tests() # Basic with plots - hypeR(signature, gsets_obj, test=test, background=2520, plotting=TRUE) |> + hypeR(signature, gsets_obj, test=test, background=2520, plotting=TRUE) %>% hyp_tests(test_plots=TRUE) # Test pval_cutoff - hypeR(signature, gsets_obj, test=test, background=100, pval=0.0001) |> + hypeR(signature, gsets_obj, test=test, background=100, pval=0.0001) %>% hyp_tests() # Test fdr_cutoff - hyp_obj <- hypeR(signature, gsets_obj, test=test, background=100, fdr=0.0001) |> + hyp_obj <- hypeR(signature, gsets_obj, test=test, background=100, fdr=0.0001) %>% hyp_tests(return_obj=TRUE) expect_equal(hyp_obj$args$genesets, gsets_obj) @@ -39,7 +39,7 @@ hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets expect_equal(hyp_obj$args$fdr, 0.0001) # Test relational gsets - hyp_obj <- hypeR(signature, rgsets_obj, test=test, background=80, pval=0.01) |> + hyp_obj <- hypeR(signature, rgsets_obj, test=test, background=80, pval=0.01) %>% hyp_tests(return_obj=TRUE) expect_is(hyp_obj$args$genesets, "rgsets") expect_is(hyp_obj$args$genesets, "R6") @@ -51,17 +51,17 @@ hypeR_tests <- function(test, signature, experiment, genesets, gsets_obj, rgsets expect_equal(names(multihyp_obj$data), c("Signature 1", "Signature 2", "Signature 3")) # Extracting hyp objects - multihyp_obj$data[["Signature 1"]] |> + multihyp_obj$data[["Signature 1"]] %>% hyp_tests() # Extracting hyp objects with plots multihyp_obj <- hypeR(experiment, gsets_obj, test=test, background=100, plotting=TRUE) - multihyp_obj$data[["Signature 1"]] |> + multihyp_obj$data[["Signature 1"]] %>% hyp_tests(test_plots=TRUE) # Test relational gsets multihyp_obj <- hypeR(experiment, rgsets_obj, test=test, background=100, pval=0.05) - multihyp_obj$data[["Signature 2"]] |> + multihyp_obj$data[["Signature 2"]] %>% hyp_tests() } @@ -89,48 +89,48 @@ test_that("Hypergeometric is working", { # Hypergeometric hyp_obj <- hypeR(s, gs, background=length(bg)) - expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, gs$G1, length(bg))) - expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, gs$G2, length(bg))) - expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, gs$G3, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, gs$G1, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, gs$G2, length(bg))) + expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, gs$G3, length(bg))) # Hypergeometric - Restrict Genesets to Background hyp_obj <- hypeR(s, gs, background=bg[1:18]) - expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) gsets_obj <- gsets$new(gs, quiet=TRUE) hyp_obj <- hypeR(s, gsets_obj, background=bg[1:18]) - expect_equal(filter(hyp_obj$data, label == "G1") |> pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G2") |> pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) - expect_equal(filter(hyp_obj$data, label == "G3") |> pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G1") %>% pull(pval), fisher(s, intersect(gs$G1, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G2") %>% pull(pval), fisher(s, intersect(gs$G2, bg[1:18]), length(bg[1:18]))) + expect_equal(filter(hyp_obj$data, label == "G3") %>% pull(pval), fisher(s, intersect(gs$G3, bg[1:18]), length(bg[1:18]))) }) test_that("KS Test is working", { genesets <- msigdb_gsets("Homo sapiens", "C2", "CP:KEGG")$genesets[1:5] - all_genes <- genesets |> unlist(use.names = FALSE) + all_genes <- genesets %>% unlist(use.names = FALSE) genesets_names <- names(genesets) # Geneset 1 is top skewed experiment <- c(head(genesets[[1]], 5), LETTERS, tail(genesets[[1]], 1)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - #expect_equal(hyp_obj$data[genesets_names[[1]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[1]][1:5], "'",collapse=",")) - expect_equal(filter(hyp_obj$data, label == genesets_names[[1]]) |> dplyr::pull(hits) |> unname(), + #expect_equal(hyp_obj$data[genesets_names[[1]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[1]][1:5], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[1]]) %>% dplyr::pull(hits) %>% unname(), paste(genesets[[1]][1:5], collapse = " , ")) # Geneset 2 is mixed experiment <- c(head(genesets[[2]], 8), LETTERS, tail(genesets[[2]], 10)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - #expect_equal(hyp_obj$data[genesets_names[[2]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[2]][1:8], "'",collapse=",")) - expect_equal(filter(hyp_obj$data, label == genesets_names[[2]]) |> dplyr::pull(hits) |> unname(), + #expect_equal(hyp_obj$data[genesets_names[[2]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[2]][1:8], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[2]]) %>% dplyr::pull(hits) %>% unname(), paste(genesets[[2]][1:8], collapse = " , ")) # Geneset 3 is bottom skewed experiment <- c(head(genesets[[3]], 1), LETTERS, tail(genesets[[3]], 8)) hyp_obj <- hypeR(experiment, genesets, background=2522, test="kstest") - #expect_equal(hyp_obj$data[genesets_names[[3]], "hits"] |> unlist(use.names=FALSE), paste0("'", genesets[[3]][1], "'",collapse=",")) - expect_equal(filter(hyp_obj$data, label == genesets_names[[3]]) |> dplyr::pull(hits) |> unname(), + #expect_equal(hyp_obj$data[genesets_names[[3]], "hits"] %>% unlist(use.names=FALSE), paste0("'", genesets[[3]][1], "'",collapse=",")) + expect_equal(filter(hyp_obj$data, label == genesets_names[[3]]) %>% dplyr::pull(hits) %>% unname(), paste(genesets[[3]][1], collapse = " , ")) })