From 4731ba52da1f325084598e80a290bc6d88638c4c Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 18 Sep 2023 13:26:14 -0500 Subject: [PATCH 01/82] add ramp mapping --- R/idMapping.R | 5 +- R/idMappingMetabolites.R | 148 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 R/idMappingMetabolites.R diff --git a/R/idMapping.R b/R/idMapping.R index 87020ed..0a28e6a 100644 --- a/R/idMapping.R +++ b/R/idMapping.R @@ -53,7 +53,10 @@ idMapping <- function(organism="hsapiens", dataType="list", inputGeneFile=NULL, ##########gene level ID Mapping########## if(standardSource=="entrezgene"){ idMap <- idMappingGene(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) - } else { + } else if(standardSource == "rampc"){ + idMap <- idMappingMetabolites(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, standardId = standardSource, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) + } + else { idMap <- idMappingPhosphosite(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) } diff --git a/R/idMappingMetabolites.R b/R/idMappingMetabolites.R new file mode 100644 index 0000000..84b7c70 --- /dev/null +++ b/R/idMappingMetabolites.R @@ -0,0 +1,148 @@ +#' @importFrom httr POST content +#' @importFrom dplyr right_join select left_join %>% +idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGeneFile=NULL, inputGene=NULL, sourceIdType, targetIdType, standardId, collapseMethod="mean", mappingOutput=FALSE, outputFileName="", hostName="https://www.webgestalt.org/") { + + ###########Check input data type############### + inputGene <- idMappingInput(dataType=dataType,inputGeneFile=inputGeneFile,inputGene=inputGene) + + ##########ID Mapping Specify to phosphosite level############### + if(dataType=="list"){ + inputGeneL <- unique(inputGene) + } + + if(dataType=="rnk"){ + ######Collapse the gene ids with multiple scores########## + x <- tapply(inputGene$score, inputGene$gene, collapseMethod) + inputGene <- data.frame(gene=names(x),score=as.numeric(x),stringsAsFactors=FALSE) + inputGeneL <- inputGene$gene + colnames(inputGene) <- c(sourceIdType,"score") + } + + if(dataType=="gmt"){ + colnames(inputGene) <- c("geneSet", "link", sourceIdType) + inputGeneL <- unique(inputGene$gene) + } + + if (startsWith(hostName, "file://")) { + sourceMap <- read_tsv( + removeFileProtocol(file.path(hostName, "xref", paste(organism, sourceIdType, paste0(standardId,".table"), sep="_"))), + col_names=c(standardId, "userId"), col_types="cc", quote="" + ) %>% filter(.data$userId %in% inputGeneL) + if (targetIdType == standardId || targetIdType == sourceIdType) { + mappedInputGene <- sourceMap + } else { + targetMap <- read_tsv( + removeFileProtocol(file.path(hostName, "xref", paste(organism, targetIdType, paste0(standardId,".table"), sep="_"))), + col_names=c(standardId, targetIdType), col_types="cc", quote="" + ) + mappedInputGene <- inner_join(sourceMap, targetMap, by=c(standardId)) + } + if (nrow(mappedInputGene) == 0) { return(idMappingError("empty")) } + mappedInputGene <- select(mappedInputGene, .data$userId, targetIdType) + unmappedIds <- setdiff(inputGeneL, mappedInputGene$userId) + } else { + response <- POST(file.path(hostName, "api", "idmapping"), encode="json", + body=list(organism=organism, sourceType=sourceIdType, + targetType=targetIdType, ids=inputGeneL, standardId=standardId) + ) + if (response$status_code != 200) { + stop(webRequestError(response)) + } + mapRes <- content(response) + if (mapRes$status == 1) { + stop(webApiError(mapRes)) + } + + mappedIds <- mapRes$mapped + + unmappedIds <- unlist(mapRes$unmapped) + + if (length(mappedIds) == 0) { stop(idMappingError("empty")) } + names <- c("sourceId", "targetId") + mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN=function(x) { x[names] })), nrow=length(mappedIds), byrow=TRUE), stringsAsFactors=FALSE) + colnames(mappedInputGene) <- c("userId", targetIdType) + unmappedIds <- append(unmappedIds, mappedInputGene[duplicated(mappedInputGene$rampc),]$userId) + mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$rampc),] + response <- POST(file.path(hostName, "api", "idmapping"), encode="json", + body=list(organism=organism, sourceType="rampc", + targetType="metabolite_name", ids=mappedInputGene$rampc, standardId=standardId) + ) + mapRes <- content(response) + if (mapRes$status == 1) { + stop(webApiError(mapRes)) + } + newMapped <- mapRes$mapped + meta_names <- data.frame(matrix(unlist(lapply(replace_null(newMapped), FUN=function(x) { x[names] })), nrow=length(newMapped), byrow=TRUE), stringsAsFactors=FALSE) + colnames(meta_names) <- c("id", "meta") + } + + + + mappedInputGene$geneSymbol <- mappedInputGene$userId + mappedInputGene$geneName <- meta_names$meta + # mappedInputGene$gLink <- mappedInputGene$userId + + # Link + # outlink <- "NONE" + # end <- "" + old_id_type <- sourceIdType + sourceIdType <- tolower(sourceIdType) + if (sourceIdType == "hmdb"){ + mappedInputGene$gLink <- paste0("https://www.hmdb.ca/metabolites/", replace_prefix(mappedInputGene$userId, "hmdb:")) + } else if (sourceIdType == "chebi"){ + mappedInputGene$gLink <- paste0("https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:", replace_prefix(mappedInputGene$userId, "chebi:")) + } else if (sourceIdType == "cas"){ + mappedInputGene$gLink <- paste0("https://commonchemistry.cas.org/detail?cas_rn=", replace_prefix(mappedInputGene$userId, "cas:")) + } else if (sourceIdType == "chemspider"){ + mappedInputGene$gLink <- paste0("https://www.chemspider.com/Chemical-Structure.", replace_prefix(mappedInputGene$userId, "chemspider:"), ".html") + } else if (sourceIdType == "kegg") { + mapppedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg:")) + } else if (sourceIdType == "kegg_glycan"){ + mapppedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg_glycan:")) + } else if (sourceIdType == "lipidbank"){ + cuts <- replace_prefix(mappedInputGene$userId, "lipidbank:") + mapppedInputGene$gLink <- paste0("https://lipidbank.jp/", sapply(cuts, function(x) return(substring(x, 1, 3)))) + } else if (sourceIdType == "lipidmaps"){ + mapppedInputGene$gLink <- paste0("https://www.lipidmaps.org/databases/lmsd/", replace_prefix(mappedInputGene$userId, "lipidmaps:")) + } else if (sourceIdType == "plantfa"){ + mapppedInputGene$gLink <- paste0("https://plantfadb.org/fatty_acids/", replace_prefix(mappedInputGene$userId, "plantfa:")) + } else if (sourceIdType == "pubchem"){ + mapppedInputGene$gLink <- paste0("https://pubchem.ncbi.nlm.nih.gov/compound/", replace_prefix(mappedInputGene$userId, "pubchem:")) + } else if (sourceIdType == "swisslipids"){ + mapppedInputGene$gLink <- paste0("http://www.swisslipids.org/#/entity/", replace_prefix(mappedInputGene$userId, "swisslipids:"), "/") + } else if (sourceIdType == "wikidata"){ + mapppedInputGene$gLink <- paste0("https://www.wikidata.org/wiki/", replace_prefix(mappedInputGene$userId, "wikidata:")) + } else { + mapppedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) + } + + inputGene <- mappedInputGene + # if(dataType=="list"){ + # inputGene <- mappedInputGene + # } + + # if(dataType=="rnk"){ + # inputGene <- mappedInputGene + # } + + # if(dataType=="gmt"){ + # inputGene <- mappedInputGene + # } + + #############Output####################### + if (mappingOutput) { + idMappingOutput(outputFileName, inputGene, unmappedIds, dataType, old_id_type, targetIdType=targetIdType) + } + r <- list(mapped=inputGene,unmapped=unmappedIds) + return(r) +} + +replace_prefix <- function(x, prefix){ + return(sapply(x, function(y) return(sub(prefix, "", y, ignore.case = TRUE)))) +} + +.combineG <- function(e){ + e <- e[-length(e)] + e <- paste(e,collapse="_") + return(e) +} From bd50cb146dcbb788c3753f4790931e880c15cbdf Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 10 Apr 2023 16:22:56 -0500 Subject: [PATCH 02/82] add install script --- install | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 install diff --git a/install b/install new file mode 100755 index 0000000..bcd94c4 --- /dev/null +++ b/install @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +R -e "library(utils) +library(devtools) +remove.packages('WebGestaltR') +install('~/Code/webgestaltr-metabolomics/') +q()" +echo "New version installed" From ecf08bb5390b10d8492c2fe38d4be6327ba4afd8 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 11 Apr 2023 16:30:07 -0500 Subject: [PATCH 03/82] changed mappped to mapped :skull: --- R/enrichResultSection.R | 48 +++++++++++++++++--------------- R/idMappingMetabolites.R | 27 ++++++++---------- inst/templates/template.mustache | 2 +- install | 2 +- 4 files changed, 39 insertions(+), 40 deletions(-) diff --git a/R/enrichResultSection.R b/R/enrichResultSection.R index 48ec77c..52075c8 100644 --- a/R/enrichResultSection.R +++ b/R/enrichResultSection.R @@ -8,28 +8,30 @@ #' @keywords internal #' enrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters) { - if ('database' %in% colnames(geneSet)) { - #multiple databases - netDatabases <- names(geneSetNet[!sapply(geneSetNet, is.null)]) - setSource <- geneSet %>% select(.data$geneSet, .data$database) %>% - distinct() %>% - filter(.data$geneSet %in% enrichedSig$geneSet) - setsWithNetJson <- toJSON((filter(setSource, .data$database %in% netDatabases))$geneSet) - hasGeneSetDag <- length(geneSetDag[!sapply(geneSetDag, is.null)]) > 0 - hasMultipleDatabases <- TRUE - } else { - setsWithNetJson <- toJSON(!is.null(geneSetNet), auto_unbox=TRUE) - hasGeneSetDag <- !is.null(geneSetDag) - hasMultipleDatabases <- FALSE - } + if ("database" %in% colnames(geneSet)) { + # multiple databases + netDatabases <- names(geneSetNet[!sapply(geneSetNet, is.null)]) + setSource <- geneSet %>% + select(.data$geneSet, .data$database) %>% + distinct() %>% + filter(.data$geneSet %in% enrichedSig$geneSet) + setsWithNetJson <- toJSON((filter(setSource, .data$database %in% netDatabases))$geneSet) + hasGeneSetDag <- length(geneSetDag[!sapply(geneSetDag, is.null)]) > 0 + hasMultipleDatabases <- TRUE + } else { + setsWithNetJson <- toJSON(!is.null(geneSetNet), auto_unbox = TRUE) + hasGeneSetDag <- !is.null(geneSetDag) + hasMultipleDatabases <- FALSE + } - data <- list(methodIsOra=enrichMethod=='ORA', - hasGeneSetDag=hasGeneSetDag, - hasMultipleDatabases=hasMultipleDatabases, - setsWithNetJson=setsWithNetJson, - hasAp=!is.null(clusters$ap), - hasWsc=!is.null(clusters$wsc) - ) - template <- readLines(system.file("templates/enrichResultSection.mustache", package="WebGestaltR")) - return(whisker.render(template, data)) + data <- list( + methodIsOra = enrichMethod == "ORA", + hasGeneSetDag = hasGeneSetDag, + hasMultipleDatabases = hasMultipleDatabases, + setsWithNetJson = setsWithNetJson, + hasAp = !is.null(clusters$ap), + hasWsc = !is.null(clusters$wsc) + ) + template <- readLines(system.file("templates/enrichResultSection.mustache", package = "WebGestaltR")) + return(whisker.render(template, data)) } diff --git a/R/idMappingMetabolites.R b/R/idMappingMetabolites.R index 84b7c70..4dbe625 100644 --- a/R/idMappingMetabolites.R +++ b/R/idMappingMetabolites.R @@ -22,7 +22,7 @@ idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGene colnames(inputGene) <- c("geneSet", "link", sourceIdType) inputGeneL <- unique(inputGene$gene) } - + mappedInputGene <- NULL if (startsWith(hostName, "file://")) { sourceMap <- read_tsv( removeFileProtocol(file.path(hostName, "xref", paste(organism, sourceIdType, paste0(standardId,".table"), sep="_"))), @@ -60,7 +60,8 @@ idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGene if (length(mappedIds) == 0) { stop(idMappingError("empty")) } names <- c("sourceId", "targetId") mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN=function(x) { x[names] })), nrow=length(mappedIds), byrow=TRUE), stringsAsFactors=FALSE) - colnames(mappedInputGene) <- c("userId", targetIdType) + + colnames(mappedInputGene) <- c("userId", "rampc") unmappedIds <- append(unmappedIds, mappedInputGene[duplicated(mappedInputGene$rampc),]$userId) mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$rampc),] response <- POST(file.path(hostName, "api", "idmapping"), encode="json", @@ -75,9 +76,6 @@ idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGene meta_names <- data.frame(matrix(unlist(lapply(replace_null(newMapped), FUN=function(x) { x[names] })), nrow=length(newMapped), byrow=TRUE), stringsAsFactors=FALSE) colnames(meta_names) <- c("id", "meta") } - - - mappedInputGene$geneSymbol <- mappedInputGene$userId mappedInputGene$geneName <- meta_names$meta # mappedInputGene$gLink <- mappedInputGene$userId @@ -96,26 +94,25 @@ idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGene } else if (sourceIdType == "chemspider"){ mappedInputGene$gLink <- paste0("https://www.chemspider.com/Chemical-Structure.", replace_prefix(mappedInputGene$userId, "chemspider:"), ".html") } else if (sourceIdType == "kegg") { - mapppedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg:")) + mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg:")) } else if (sourceIdType == "kegg_glycan"){ - mapppedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg_glycan:")) + mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg_glycan:")) } else if (sourceIdType == "lipidbank"){ cuts <- replace_prefix(mappedInputGene$userId, "lipidbank:") - mapppedInputGene$gLink <- paste0("https://lipidbank.jp/", sapply(cuts, function(x) return(substring(x, 1, 3)))) + mappedInputGene$gLink <- paste0("https://lipidbank.jp/", sapply(cuts, function(x) return(substring(x, 1, 3)))) } else if (sourceIdType == "lipidmaps"){ - mapppedInputGene$gLink <- paste0("https://www.lipidmaps.org/databases/lmsd/", replace_prefix(mappedInputGene$userId, "lipidmaps:")) + mappedInputGene$gLink <- paste0("https://www.lipidmaps.org/databases/lmsd/", replace_prefix(mappedInputGene$userId, "lipidmaps:")) } else if (sourceIdType == "plantfa"){ - mapppedInputGene$gLink <- paste0("https://plantfadb.org/fatty_acids/", replace_prefix(mappedInputGene$userId, "plantfa:")) + mappedInputGene$gLink <- paste0("https://plantfadb.org/fatty_acids/", replace_prefix(mappedInputGene$userId, "plantfa:")) } else if (sourceIdType == "pubchem"){ - mapppedInputGene$gLink <- paste0("https://pubchem.ncbi.nlm.nih.gov/compound/", replace_prefix(mappedInputGene$userId, "pubchem:")) + mappedInputGene$gLink <- paste0("https://pubchem.ncbi.nlm.nih.gov/compound/", replace_prefix(mappedInputGene$userId, "pubchem:")) } else if (sourceIdType == "swisslipids"){ - mapppedInputGene$gLink <- paste0("http://www.swisslipids.org/#/entity/", replace_prefix(mappedInputGene$userId, "swisslipids:"), "/") + mappedInputGene$gLink <- paste0("http://www.swisslipids.org/#/entity/", replace_prefix(mappedInputGene$userId, "swisslipids:"), "/") } else if (sourceIdType == "wikidata"){ - mapppedInputGene$gLink <- paste0("https://www.wikidata.org/wiki/", replace_prefix(mappedInputGene$userId, "wikidata:")) + mappedInputGene$gLink <- paste0("https://www.wikidata.org/wiki/", replace_prefix(mappedInputGene$userId, "wikidata:")) } else { - mapppedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) + mappedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) } - inputGene <- mappedInputGene # if(dataType=="list"){ # inputGene <- mappedInputGene diff --git a/inst/templates/template.mustache b/inst/templates/template.mustache index 61d22e9..3ec0862 100644 --- a/inst/templates/template.mustache +++ b/inst/templates/template.mustache @@ -25,7 +25,7 @@ ga('send', 'pageview'); -{{>header}} +{{>header}}
diff --git a/install b/install index bcd94c4..58d69bb 100755 --- a/install +++ b/install @@ -2,6 +2,6 @@ R -e "library(utils) library(devtools) remove.packages('WebGestaltR') -install('~/Code/webgestaltr-metabolomics/') +install('.') q()" echo "New version installed" From b65ce889eb3f675877796adeef57c5f60548caed Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 18 Apr 2023 09:35:39 -0500 Subject: [PATCH 04/82] formatting --- R/idMappingMetabolites.R | 260 ++++++++++---------- inst/templates/enrichResultSection.mustache | 6 +- 2 files changed, 135 insertions(+), 131 deletions(-) diff --git a/R/idMappingMetabolites.R b/R/idMappingMetabolites.R index 4dbe625..b3774f7 100644 --- a/R/idMappingMetabolites.R +++ b/R/idMappingMetabolites.R @@ -1,145 +1,149 @@ #' @importFrom httr POST content #' @importFrom dplyr right_join select left_join %>% -idMappingMetabolites <- function(organism="hsapiens", dataType="list", inputGeneFile=NULL, inputGene=NULL, sourceIdType, targetIdType, standardId, collapseMethod="mean", mappingOutput=FALSE, outputFileName="", hostName="https://www.webgestalt.org/") { +idMappingMetabolites <- function(organism = "hsapiens", dataType = "list", inputGeneFile = NULL, inputGene = NULL, sourceIdType, targetIdType, standardId, collapseMethod = "mean", mappingOutput = FALSE, outputFileName = "", hostName = "https://www.webgestalt.org/") { + ########### Check input data type############### + inputGene <- idMappingInput(dataType = dataType, inputGeneFile = inputGeneFile, inputGene = inputGene) - ###########Check input data type############### - inputGene <- idMappingInput(dataType=dataType,inputGeneFile=inputGeneFile,inputGene=inputGene) + ########## ID Mapping Specify to phosphosite level############### + if (dataType == "list") { + inputGeneL <- unique(inputGene) + } - ##########ID Mapping Specify to phosphosite level############### - if(dataType=="list"){ - inputGeneL <- unique(inputGene) - } + if (dataType == "rnk") { + ###### Collapse the gene ids with multiple scores########## + x <- tapply(inputGene$score, inputGene$gene, collapseMethod) + inputGene <- data.frame(gene = names(x), score = as.numeric(x), stringsAsFactors = FALSE) + inputGeneL <- inputGene$gene + colnames(inputGene) <- c(sourceIdType, "score") + } - if(dataType=="rnk"){ - ######Collapse the gene ids with multiple scores########## - x <- tapply(inputGene$score, inputGene$gene, collapseMethod) - inputGene <- data.frame(gene=names(x),score=as.numeric(x),stringsAsFactors=FALSE) - inputGeneL <- inputGene$gene - colnames(inputGene) <- c(sourceIdType,"score") - } + if (dataType == "gmt") { + colnames(inputGene) <- c("geneSet", "link", sourceIdType) + inputGeneL <- unique(inputGene$gene) + } + mappedInputGene <- NULL + if (startsWith(hostName, "file://")) { + sourceMap <- read_tsv( + removeFileProtocol(file.path(hostName, "xref", paste(organism, sourceIdType, paste0(standardId, ".table"), sep = "_"))), + col_names = c(standardId, "userId"), col_types = "cc", quote = "" + ) %>% filter(.data$userId %in% inputGeneL) + if (targetIdType == standardId || targetIdType == sourceIdType) { + mappedInputGene <- sourceMap + } else { + targetMap <- read_tsv( + removeFileProtocol(file.path(hostName, "xref", paste(organism, targetIdType, paste0(standardId, ".table"), sep = "_"))), + col_names = c(standardId, targetIdType), col_types = "cc", quote = "" + ) + mappedInputGene <- inner_join(sourceMap, targetMap, by = c(standardId)) + } + if (nrow(mappedInputGene) == 0) { + return(idMappingError("empty")) + } + mappedInputGene <- select(mappedInputGene, .data$userId, targetIdType) + unmappedIds <- setdiff(inputGeneL, mappedInputGene$userId) + } else { + response <- POST(file.path(hostName, "api", "idmapping"), + encode = "json", + body = list( + organism = organism, sourceType = sourceIdType, + targetType = targetIdType, ids = inputGeneL, standardId = standardId + ) + ) + if (response$status_code != 200) { + stop(webRequestError(response)) + } + mapRes <- content(response) + if (mapRes$status == 1) { + stop(webApiError(mapRes)) + } - if(dataType=="gmt"){ - colnames(inputGene) <- c("geneSet", "link", sourceIdType) - inputGeneL <- unique(inputGene$gene) - } - mappedInputGene <- NULL - if (startsWith(hostName, "file://")) { - sourceMap <- read_tsv( - removeFileProtocol(file.path(hostName, "xref", paste(organism, sourceIdType, paste0(standardId,".table"), sep="_"))), - col_names=c(standardId, "userId"), col_types="cc", quote="" - ) %>% filter(.data$userId %in% inputGeneL) - if (targetIdType == standardId || targetIdType == sourceIdType) { - mappedInputGene <- sourceMap - } else { - targetMap <- read_tsv( - removeFileProtocol(file.path(hostName, "xref", paste(organism, targetIdType, paste0(standardId,".table"), sep="_"))), - col_names=c(standardId, targetIdType), col_types="cc", quote="" - ) - mappedInputGene <- inner_join(sourceMap, targetMap, by=c(standardId)) - } - if (nrow(mappedInputGene) == 0) { return(idMappingError("empty")) } - mappedInputGene <- select(mappedInputGene, .data$userId, targetIdType) - unmappedIds <- setdiff(inputGeneL, mappedInputGene$userId) - } else { - response <- POST(file.path(hostName, "api", "idmapping"), encode="json", - body=list(organism=organism, sourceType=sourceIdType, - targetType=targetIdType, ids=inputGeneL, standardId=standardId) - ) - if (response$status_code != 200) { - stop(webRequestError(response)) - } - mapRes <- content(response) - if (mapRes$status == 1) { - stop(webApiError(mapRes)) - } + mappedIds <- mapRes$mapped - mappedIds <- mapRes$mapped - - unmappedIds <- unlist(mapRes$unmapped) + unmappedIds <- unlist(mapRes$unmapped) - if (length(mappedIds) == 0) { stop(idMappingError("empty")) } - names <- c("sourceId", "targetId") - mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN=function(x) { x[names] })), nrow=length(mappedIds), byrow=TRUE), stringsAsFactors=FALSE) - - colnames(mappedInputGene) <- c("userId", "rampc") - unmappedIds <- append(unmappedIds, mappedInputGene[duplicated(mappedInputGene$rampc),]$userId) - mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$rampc),] - response <- POST(file.path(hostName, "api", "idmapping"), encode="json", - body=list(organism=organism, sourceType="rampc", - targetType="metabolite_name", ids=mappedInputGene$rampc, standardId=standardId) - ) - mapRes <- content(response) - if (mapRes$status == 1) { - stop(webApiError(mapRes)) - } - newMapped <- mapRes$mapped - meta_names <- data.frame(matrix(unlist(lapply(replace_null(newMapped), FUN=function(x) { x[names] })), nrow=length(newMapped), byrow=TRUE), stringsAsFactors=FALSE) - colnames(meta_names) <- c("id", "meta") - } - mappedInputGene$geneSymbol <- mappedInputGene$userId - mappedInputGene$geneName <- meta_names$meta - # mappedInputGene$gLink <- mappedInputGene$userId + if (length(mappedIds) == 0) { + stop(idMappingError("empty")) + } + names <- c("sourceId", "targetId") + mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN = function(x) { + x[names] + })), nrow = length(mappedIds), byrow = TRUE), stringsAsFactors = FALSE) - # Link - # outlink <- "NONE" - # end <- "" - old_id_type <- sourceIdType - sourceIdType <- tolower(sourceIdType) - if (sourceIdType == "hmdb"){ - mappedInputGene$gLink <- paste0("https://www.hmdb.ca/metabolites/", replace_prefix(mappedInputGene$userId, "hmdb:")) - } else if (sourceIdType == "chebi"){ - mappedInputGene$gLink <- paste0("https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:", replace_prefix(mappedInputGene$userId, "chebi:")) - } else if (sourceIdType == "cas"){ - mappedInputGene$gLink <- paste0("https://commonchemistry.cas.org/detail?cas_rn=", replace_prefix(mappedInputGene$userId, "cas:")) - } else if (sourceIdType == "chemspider"){ - mappedInputGene$gLink <- paste0("https://www.chemspider.com/Chemical-Structure.", replace_prefix(mappedInputGene$userId, "chemspider:"), ".html") - } else if (sourceIdType == "kegg") { - mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg:")) - } else if (sourceIdType == "kegg_glycan"){ - mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg_glycan:")) - } else if (sourceIdType == "lipidbank"){ - cuts <- replace_prefix(mappedInputGene$userId, "lipidbank:") - mappedInputGene$gLink <- paste0("https://lipidbank.jp/", sapply(cuts, function(x) return(substring(x, 1, 3)))) - } else if (sourceIdType == "lipidmaps"){ - mappedInputGene$gLink <- paste0("https://www.lipidmaps.org/databases/lmsd/", replace_prefix(mappedInputGene$userId, "lipidmaps:")) - } else if (sourceIdType == "plantfa"){ - mappedInputGene$gLink <- paste0("https://plantfadb.org/fatty_acids/", replace_prefix(mappedInputGene$userId, "plantfa:")) - } else if (sourceIdType == "pubchem"){ - mappedInputGene$gLink <- paste0("https://pubchem.ncbi.nlm.nih.gov/compound/", replace_prefix(mappedInputGene$userId, "pubchem:")) - } else if (sourceIdType == "swisslipids"){ - mappedInputGene$gLink <- paste0("http://www.swisslipids.org/#/entity/", replace_prefix(mappedInputGene$userId, "swisslipids:"), "/") - } else if (sourceIdType == "wikidata"){ - mappedInputGene$gLink <- paste0("https://www.wikidata.org/wiki/", replace_prefix(mappedInputGene$userId, "wikidata:")) - } else { - mappedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) - } - inputGene <- mappedInputGene - # if(dataType=="list"){ - # inputGene <- mappedInputGene - # } + colnames(mappedInputGene) <- c("userId", "rampc") + unmappedIds <- append(unmappedIds, mappedInputGene[duplicated(mappedInputGene$rampc), ]$userId) + mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$rampc), ] + response <- POST(file.path(hostName, "api", "idmapping"), + encode = "json", + body = list( + organism = organism, sourceType = "rampc", + targetType = "metabolite_name", ids = mappedInputGene$rampc, standardId = standardId + ) + ) + mapRes <- content(response) + if (mapRes$status == 1) { + stop(webApiError(mapRes)) + } + newMapped <- mapRes$mapped + meta_names <- data.frame(matrix(unlist(lapply(replace_null(newMapped), FUN = function(x) { + x[names] + })), nrow = length(newMapped), byrow = TRUE), stringsAsFactors = FALSE) + colnames(meta_names) <- c("id", "meta") + } + mappedInputGene$geneSymbol <- mappedInputGene$userId + mappedInputGene$geneName <- meta_names$meta - # if(dataType=="rnk"){ - # inputGene <- mappedInputGene - # } + # Link - # if(dataType=="gmt"){ - # inputGene <- mappedInputGene - # } + old_id_type <- sourceIdType + sourceIdType <- tolower(sourceIdType) + if (sourceIdType == "hmdb") { + mappedInputGene$gLink <- paste0("https://www.hmdb.ca/metabolites/", replace_prefix(mappedInputGene$userId, "hmdb:")) + } else if (sourceIdType == "chebi") { + mappedInputGene$gLink <- paste0("https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:", replace_prefix(mappedInputGene$userId, "chebi:")) + } else if (sourceIdType == "cas") { + mappedInputGene$gLink <- paste0("https://commonchemistry.cas.org/detail?cas_rn=", replace_prefix(mappedInputGene$userId, "cas:")) + } else if (sourceIdType == "chemspider") { + mappedInputGene$gLink <- paste0("https://www.chemspider.com/Chemical-Structure.", replace_prefix(mappedInputGene$userId, "chemspider:"), ".html") + } else if (sourceIdType == "kegg") { + mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg:")) + } else if (sourceIdType == "kegg_glycan") { + mappedInputGene$gLink <- paste0("https://www.genome.jp/entry/", replace_prefix(mappedInputGene$userId, "kegg_glycan:")) + } else if (sourceIdType == "lipidbank") { + cuts <- replace_prefix(mappedInputGene$userId, "lipidbank:") + mappedInputGene$gLink <- paste0("https://lipidbank.jp/", sapply(cuts, function(x) { + return(substring(x, 1, 3)) + })) + } else if (sourceIdType == "lipidmaps") { + mappedInputGene$gLink <- paste0("https://www.lipidmaps.org/databases/lmsd/", replace_prefix(mappedInputGene$userId, "lipidmaps:")) + } else if (sourceIdType == "plantfa") { + mappedInputGene$gLink <- paste0("https://plantfadb.org/fatty_acids/", replace_prefix(mappedInputGene$userId, "plantfa:")) + } else if (sourceIdType == "pubchem") { + mappedInputGene$gLink <- paste0("https://pubchem.ncbi.nlm.nih.gov/compound/", replace_prefix(mappedInputGene$userId, "pubchem:")) + } else if (sourceIdType == "swisslipids") { + mappedInputGene$gLink <- paste0("http://www.swisslipids.org/#/entity/", replace_prefix(mappedInputGene$userId, "swisslipids:"), "/") + } else if (sourceIdType == "wikidata") { + mappedInputGene$gLink <- paste0("https://www.wikidata.org/wiki/", replace_prefix(mappedInputGene$userId, "wikidata:")) + } else { + mappedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) + } + inputGene <- mappedInputGene - #############Output####################### - if (mappingOutput) { - idMappingOutput(outputFileName, inputGene, unmappedIds, dataType, old_id_type, targetIdType=targetIdType) - } - r <- list(mapped=inputGene,unmapped=unmappedIds) - return(r) + ############# Output####################### + if (mappingOutput) { + idMappingOutput(outputFileName, inputGene, unmappedIds, dataType, old_id_type, targetIdType = targetIdType) + } + r <- list(mapped = inputGene, unmapped = unmappedIds) + return(r) } -replace_prefix <- function(x, prefix){ - return(sapply(x, function(y) return(sub(prefix, "", y, ignore.case = TRUE)))) +replace_prefix <- function(x, prefix) { + return(sapply(x, function(y) { + return(sub(prefix, "", y, ignore.case = TRUE)) + })) } -.combineG <- function(e){ - e <- e[-length(e)] - e <- paste(e,collapse="_") - return(e) +.combineG <- function(e) { + e <- e[-length(e)] + e <- paste(e, collapse = "_") + return(e) } diff --git a/inst/templates/enrichResultSection.mustache b/inst/templates/enrichResultSection.mustache index 7815ec6..32b925d 100644 --- a/inst/templates/enrichResultSection.mustache +++ b/inst/templates/enrichResultSection.mustache @@ -1,20 +1,20 @@

Enrichment Results

- + Redundancy reduction: All {{#hasAp}} - + Affinity propagation {{/hasAp}} {{#hasWsc}} - + Weighted set cover From afbe812b9a5592f759d6ed647ec335a03fa29418 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 24 Apr 2023 12:37:35 -0500 Subject: [PATCH 05/82] adds working version of k-medoid --- .gitignore | 37 +++ .lintr | 6 + NAMESPACE | 2 + R/WebGestaltROra.R | 299 ++++++++++---------- R/kMedoid.R | 43 +++ inst/templates/enrichResultSection.mustache | 6 +- install | 6 +- 7 files changed, 245 insertions(+), 154 deletions(-) create mode 100644 .lintr create mode 100644 R/kMedoid.R diff --git a/.gitignore b/.gitignore index 67ddd03..085035c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,41 @@ .Rproj.user .DS_Store .Rhistory +.Rapp.history + +# Session Data files +.RData + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron .vscode diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..05abbd4 --- /dev/null +++ b/.lintr @@ -0,0 +1,6 @@ +linters: linters_with_defaults( + line_length_linter(120), + commented_code_linter = NULL, + no_tab_linter = NULL + ) + diff --git a/NAMESPACE b/NAMESPACE index d48d19a..70331e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(IDMapping) export(WebGestaltR) export(WebGestaltRBatch) export(WebGestaltR_batch) +export(kMedoid) export(affinityPropagation) export(formatCheck) export(goSlimSummary) @@ -29,6 +30,7 @@ import(methods) import(utils) importFrom(Rcpp,sourceCpp) importFrom(apcluster,apcluster) +importFrom(cluster, pam) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(dplyr,"%>%") diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index c1737a2..3cca5fb 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -1,152 +1,153 @@ #' @importFrom readr write_tsv #' @importFrom dplyr left_join select arrange %>% desc mutate -WebGestaltROra <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/") { - enrichMethod <- "ORA" - projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) - - ######### Web server will input "NULL" to the R package, thus, we need to change "NULL" to NULL ######## - enrichDatabase <- testNull(enrichDatabase) - enrichDatabaseFile <- testNull(enrichDatabaseFile) - enrichDatabaseType <- testNull(enrichDatabaseType) - enrichDatabaseDescriptionFile <- testNull(enrichDatabaseDescriptionFile) - interestGeneFile <- testNull(interestGeneFile) - interestGene <- testNull(interestGene) - interestGeneType <- testNull(interestGeneType) - referenceGeneFile <- testNull(referenceGeneFile) - referenceGene <- testNull(referenceGene) - referenceGeneType <- testNull(referenceGeneType) - referenceSet <- testNull(referenceSet) - - ################ Check parameter ################ - errorTest <- parameterErrorMessage(enrichMethod=enrichMethod, organism=organism, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, isOutput=isOutput, outputDirectory=outputDirectory, dagColor=dagColor, hostName=hostName, cache=cache) - - if (!is.null(errorTest)) { - stop(errorTest) - } - - ############# Check enriched database ############# - cat("Loading the functional categories...\n") - enrichD <- loadGeneSet(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, cache=cache, hostName=hostName) - - geneSet <- enrichD$geneSet - geneSetDes <- enrichD$geneSetDes - geneSetDag <- enrichD$geneSetDag - geneSetNet <- enrichD$geneSetNet - databaseStandardId <- enrichD$standardId - rm(enrichD) - - ########### Check input interesting gene list ############### - cat("Loading the ID list...\n") - interestingGeneMap <- loadInterestGene(organism=organism, dataType="list", inputGeneFile=interestGeneFile, inputGene=interestGene, geneType=interestGeneType, collapseMethod=collapseMethod, cache=cache, hostName=hostName, geneSet=geneSet) - - if (organism == "others") { - interestGeneList <- unique(interestingGeneMap) - } else { - interestStandardId <- interestingGeneMap$standardId - interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) - } - - ################### Load reference gene set ############## - cat("Loading the reference list...\n") - referenceGeneList <- loadReferenceGene(organism=organism, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, collapseMethod=collapseMethod, hostName=hostName, geneSet=geneSet, interestGeneList=interestGeneList, cache=cache) - - ########## Create project folder ############## - if (isOutput) { - dir.create(projectDir) - - ###### Summarize gene annotation based on the GOSlim ########### - if (organism != "others") { - if (databaseStandardId == "entrezgene") { - cat("Summarizing the input ID list by GO Slim data...\n") - goSlimOutput <- file.path(projectDir, paste0("goslim_summary_", projectName)) - re <- goSlimSummary(organism=organism, geneList=interestGeneList, outputFile=goSlimOutput, outputType="png", isOutput=isOutput, cache=cache, hostName=hostName) - } - write_tsv(interestingGeneMap$mapped, file.path(projectDir, paste0("interestingID_mappingTable_", projectName , ".txt"))) - write(interestingGeneMap$unmapped, file.path(projectDir, paste0("interestingID_unmappedList_", projectName, ".txt"))) - } else { - write(interestGeneList, file.path(projectDir, paste0("interestList_", projectName, ".txt"))) - } - } - - ############# Run enrichment analysis ################### - cat("Performing the enrichment analysis...\n") - oraRes <- oraEnrichment(interestGeneList, referenceGeneList, geneSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr) - if (is.null(oraRes)) { - return(NULL) - } - enrichedSig <- oraRes$enriched - insig <- oraRes$background - - clusters <- list() - geneTables <- list() - - if (!is.null(enrichedSig)) { - if (!is.null(geneSetDes)) { ####### Add extra description information ########### - enrichedSig <- enrichedSig %>% - left_join(geneSetDes, by="geneSet") %>% - select(.data$geneSet, .data$description, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% - arrange(.data$FDR, .data$pValue, desc(.data$size)) %>% - mutate(description=ifelse(is.na(.data$description), "", .data$description)) # now des could be mixture - } else { - enrichedSig <- enrichedSig %>% - select(.data$geneSet, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% - arrange(.data$FDR, .data$pValue, desc(.data$size)) - } - - geneTables <- getGeneTables(organism, enrichedSig, "overlapId", interestingGeneMap) - if (organism != "others") { - enrichedSig$link <- mapply(function(link, geneList) linkModification("ORA", link, geneList, interestingGeneMap), - enrichedSig$link, - enrichedSig$overlapId - ) - } - - if ("database" %in% colnames(geneSet)) { - # Add source database for multiple databases - enrichedSig <- enrichedSig %>% left_join(unique(geneSet[, c("geneSet", "database")]), by="geneSet") - } - - if (organism != "others" && interestGeneType != interestStandardId) { - outputEnrichedSig <- mapUserId(enrichedSig, "overlapId", interestingGeneMap) - } else { - outputEnrichedSig <- enrichedSig - } - - if (isOutput) { - write_tsv(outputEnrichedSig, file.path(projectDir, paste0("enrichment_results_", projectName, ".txt"))) - idsInSet <- sapply(enrichedSig$overlapId, strsplit, split=";") - names(idsInSet) <- enrichedSig$geneSet - minusLogP <- -log(enrichedSig$pValue) - minusLogP[minusLogP == Inf] <- -log(.Machine$double.eps) - apRes <- affinityPropagation(idsInSet, minusLogP) - wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) - if (!is.null(apRes)) { - writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) - } else { - apRes <- NULL - } - clusters$ap <- apRes - if (!is.null(wscRes$topSets)) { - writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, ".txt"))) - clusters$wsc <- list(representatives=wscRes$topSets, coverage=wscRes$coverage) - } else { - clusters$wsc <- NULL - } - } - } - - if (isOutput) { - ############## Create report ################## - cat("Generate the final report...\n") - createReport(hostName=hostName, outputDirectory=outputDirectory, organism=organism, projectName=projectName, enrichMethod=enrichMethod, geneSet=geneSet, geneSetDes=geneSetDes, geneSetDag=geneSetDag, geneSetNet=geneSetNet, interestingGeneMap=interestingGeneMap, referenceGeneList=referenceGeneList, enrichedSig=enrichedSig, background=insig, geneTables=geneTables, clusters=clusters, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, dagColor=dagColor) - - cwd <- getwd() - setwd(projectDir) - zip(paste0("Project_", projectName, ".zip"), ".", flags="-rq") - setwd(cwd) - - cat("Results can be found in the ", projectDir, "!\n", sep="") - } - - return(outputEnrichedSig) +WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/") { + enrichMethod <- "ORA" + projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) + + ######### Web server will input "NULL" to the R package, thus, we need to change "NULL" to NULL ######## + enrichDatabase <- testNull(enrichDatabase) + enrichDatabaseFile <- testNull(enrichDatabaseFile) + enrichDatabaseType <- testNull(enrichDatabaseType) + enrichDatabaseDescriptionFile <- testNull(enrichDatabaseDescriptionFile) + interestGeneFile <- testNull(interestGeneFile) + interestGene <- testNull(interestGene) + interestGeneType <- testNull(interestGeneType) + referenceGeneFile <- testNull(referenceGeneFile) + referenceGene <- testNull(referenceGene) + referenceGeneType <- testNull(referenceGeneType) + referenceSet <- testNull(referenceSet) + + ################ Check parameter ################ + errorTest <- parameterErrorMessage(enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache) + + if (!is.null(errorTest)) { + stop(errorTest) + } + + ############# Check enriched database ############# + cat("Loading the functional categories...\n") + enrichD <- loadGeneSet(organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName) + + geneSet <- enrichD$geneSet + geneSetDes <- enrichD$geneSetDes + geneSetDag <- enrichD$geneSetDag + geneSetNet <- enrichD$geneSetNet + databaseStandardId <- enrichD$standardId + rm(enrichD) + + ########### Check input interesting gene list ############### + cat("Loading the ID list...\n") + interestingGeneMap <- loadInterestGene(organism = organism, dataType = "list", inputGeneFile = interestGeneFile, inputGene = interestGene, geneType = interestGeneType, collapseMethod = collapseMethod, cache = cache, hostName = hostName, geneSet = geneSet) + + if (organism == "others") { + interestGeneList <- unique(interestingGeneMap) + } else { + interestStandardId <- interestingGeneMap$standardId + interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) + } + + ################### Load reference gene set ############## + cat("Loading the reference list...\n") + referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) + + ########## Create project folder ############## + if (isOutput) { + dir.create(projectDir) + + ###### Summarize gene annotation based on the GOSlim ########### + if (organism != "others") { + if (databaseStandardId == "entrezgene") { + cat("Summarizing the input ID list by GO Slim data...\n") + goSlimOutput <- file.path(projectDir, paste0("goslim_summary_", projectName)) + re <- goSlimSummary(organism = organism, geneList = interestGeneList, outputFile = goSlimOutput, outputType = "png", isOutput = isOutput, cache = cache, hostName = hostName) + } + write_tsv(interestingGeneMap$mapped, file.path(projectDir, paste0("interestingID_mappingTable_", projectName, ".txt"))) + write(interestingGeneMap$unmapped, file.path(projectDir, paste0("interestingID_unmappedList_", projectName, ".txt"))) + } else { + write(interestGeneList, file.path(projectDir, paste0("interestList_", projectName, ".txt"))) + } + } + + ############# Run enrichment analysis ################### + cat("Performing the enrichment analysis...\n") + oraRes <- oraEnrichment(interestGeneList, referenceGeneList, geneSet, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr) + if (is.null(oraRes)) { + return(NULL) + } + enrichedSig <- oraRes$enriched + insig <- oraRes$background + + clusters <- list() + geneTables <- list() + + if (!is.null(enrichedSig)) { + if (!is.null(geneSetDes)) { ####### Add extra description information ########### + enrichedSig <- enrichedSig %>% + left_join(geneSetDes, by = "geneSet") %>% + select(.data$geneSet, .data$description, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% + arrange(.data$FDR, .data$pValue, desc(.data$size)) %>% + mutate(description = ifelse(is.na(.data$description), "", .data$description)) # now des could be mixture + } else { + enrichedSig <- enrichedSig %>% + select(.data$geneSet, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% + arrange(.data$FDR, .data$pValue, desc(.data$size)) + } + + geneTables <- getGeneTables(organism, enrichedSig, "overlapId", interestingGeneMap) + if (organism != "others") { + enrichedSig$link <- mapply( + function(link, geneList) linkModification("ORA", link, geneList, interestingGeneMap), + enrichedSig$link, + enrichedSig$overlapId + ) + } + + if ("database" %in% colnames(geneSet)) { + # Add source database for multiple databases + enrichedSig <- enrichedSig %>% left_join(unique(geneSet[, c("geneSet", "database")]), by = "geneSet") + } + + if (organism != "others" && interestGeneType != interestStandardId) { + outputEnrichedSig <- mapUserId(enrichedSig, "overlapId", interestingGeneMap) + } else { + outputEnrichedSig <- enrichedSig + } + + if (isOutput) { + write_tsv(outputEnrichedSig, file.path(projectDir, paste0("enrichment_results_", projectName, ".txt"))) + idsInSet <- sapply(enrichedSig$overlapId, strsplit, split = ";") + names(idsInSet) <- enrichedSig$geneSet + minusLogP <- -log(enrichedSig$pValue) + minusLogP[minusLogP == Inf] <- -log(.Machine$double.eps) + apRes <- kMedoid(idsInSet, minusLogP) + wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) + if (!is.null(apRes)) { + writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) + } else { + apRes <- NULL + } + clusters$ap <- apRes + if (!is.null(wscRes$topSets)) { + writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, ".txt"))) + clusters$wsc <- list(representatives = wscRes$topSets, coverage = wscRes$coverage) + } else { + clusters$wsc <- NULL + } + } + } + + if (isOutput) { + ############## Create report ################## + cat("Generate the final report...\n") + createReport(hostName = hostName, outputDirectory = outputDirectory, organism = organism, projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, enrichedSig = enrichedSig, background = insig, geneTables = geneTables, clusters = clusters, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, interestGeneFile = interestGeneFile, interestGene = interestGene, interestGeneType = interestGeneType, collapseMethod = collapseMethod, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, dagColor = dagColor) + + cwd <- getwd() + setwd(projectDir) + zip(paste0("Project_", projectName, ".zip"), ".", flags = "-rq") + setwd(cwd) + + cat("Results can be found in the ", projectDir, "!\n", sep = "") + } + + return(outputEnrichedSig) } diff --git a/R/kMedoid.R b/R/kMedoid.R new file mode 100644 index 0000000..3e54ed8 --- /dev/null +++ b/R/kMedoid.R @@ -0,0 +1,43 @@ +kMedoid <- function(idsInSet, score){ + # first find out the union of sets, sorted + all.genes <- sort(unique(unlist(idsInSet))) + overlap.mat <- sapply(idsInSet, function(x) {as.integer(all.genes %in% x)}) + + num <- length(idsInSet) + sim.mat <- matrix(1, num, num) + colnames(sim.mat) <- colnames(overlap.mat) + + if (num == 1) { + return(list(sim.mat=sim.mat, ip.vec=c(1))) + } + + for (i in 1:(num-1)) { + for (j in (i+1):num) { + jaccardIndex <- sum(bitwAnd(overlap.mat[, i], overlap.mat[, j])) / sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) + sim.mat[i, j] <- jaccardIndex + sim.mat[j, i] <- jaccardIndex + } + } + # if there is no overlap, set the similarity to -Inf + for (i in 1:(num-1)) { + for (j in (i+1):num) { + if (sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) == 0) { + sim.mat[i, j] <- -Inf + sim.mat[j, i] <- -Inf + } + } + } + # compute the k-medoid clustering + kmRes <- pam(sim.mat, 10, diss=TRUE) # TODO: Make parameter for number of clusters. Currently set to 10. + + #sort clusters to make exemplar the first member + clusters <- vector(mode="list", length(kmRes$medoids)) + if(length(kmRes$medoids) == 0){ + return(NULL) + } + for (i in 1:length(clusters)) { + clusters[[i]] <- kmRes$clustering[[i]][order(kmRes$clustering[[i]] == i, decreasing=TRUE)] + } + # print(kmRes$medoids) + return(list(clusters=sapply(clusters, names), representatives=kmRes$medoids)) +} diff --git a/inst/templates/enrichResultSection.mustache b/inst/templates/enrichResultSection.mustache index 32b925d..d2a1162 100644 --- a/inst/templates/enrichResultSection.mustache +++ b/inst/templates/enrichResultSection.mustache @@ -4,11 +4,11 @@ Redundancy reduction: - All + None {{#hasAp}} - - Affinity propagation + + k-Medoid {{/hasAp}} diff --git a/install b/install index 58d69bb..111afa3 100755 --- a/install +++ b/install @@ -1,7 +1,9 @@ #!/usr/bin/env bash R -e "library(utils) -library(devtools) remove.packages('WebGestaltR') +q()" +echo "Old version removed" +R -e "library(devtools) install('.') q()" -echo "New version installed" +echo "New version installed" \ No newline at end of file From 5b6538fe5513b59e91d2ec37ae35487a340cf1d7 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 24 Apr 2023 12:45:55 -0500 Subject: [PATCH 06/82] set default k to 5 to fit examples --- R/kMedoid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kMedoid.R b/R/kMedoid.R index 3e54ed8..3f42813 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -28,7 +28,7 @@ kMedoid <- function(idsInSet, score){ } } # compute the k-medoid clustering - kmRes <- pam(sim.mat, 10, diss=TRUE) # TODO: Make parameter for number of clusters. Currently set to 10. + kmRes <- pam(sim.mat, 5, diss=TRUE) # TODO: Make parameter for number of clusters. Currently set to 5. #sort clusters to make exemplar the first member clusters <- vector(mode="list", length(kmRes$medoids)) From a22c5f4e51a1441df3445cc3a76608176091307f Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 25 Apr 2023 13:44:50 -0500 Subject: [PATCH 07/82] faster jaccard --- R/kMedoid.R | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/R/kMedoid.R b/R/kMedoid.R index 3f42813..e05ca22 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -13,22 +13,32 @@ kMedoid <- function(idsInSet, score){ for (i in 1:(num-1)) { for (j in (i+1):num) { - jaccardIndex <- sum(bitwAnd(overlap.mat[, i], overlap.mat[, j])) / sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) - sim.mat[i, j] <- jaccardIndex - sim.mat[j, i] <- jaccardIndex - } - } - # if there is no overlap, set the similarity to -Inf - for (i in 1:(num-1)) { - for (j in (i+1):num) { - if (sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) == 0) { + x <- sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) + if (x == 0) { # if there is no overlap, set the similarity to -Inf sim.mat[i, j] <- -Inf sim.mat[j, i] <- -Inf + } else { + jaccardIndex <- sum(bitwAnd(overlap.mat[, i], overlap.mat[, j])) / x + sim.mat[i, j] <- jaccardIndex + sim.mat[j, i] <- jaccardIndex } } } + + + if (max(sim.mat) == min(sim.mat)) { + # this will generate error, so randomy add some noise to off diagonal elements + mat.siz <- dim(sim.mat)[1] + rand.m <- matrix(rnorm(mat.siz*mat.siz,0,0.01),mat.siz) + # make it symmetric + rand.m[lower.tri(rand.m)] = t(rand.m)[lower.tri(rand.m)] + sim.mat <- sim.mat + rand.m + # make diagonal all 1 + diag(sim.mat) <- 1 + } + # compute the k-medoid clustering - kmRes <- pam(sim.mat, 5, diss=TRUE) # TODO: Make parameter for number of clusters. Currently set to 5. + kmRes <- pam(sim.mat, 5, diss=TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. #sort clusters to make exemplar the first member clusters <- vector(mode="list", length(kmRes$medoids)) From 9d5a8cfa7faaa39c070a490e329ad32f10f34526 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 26 Apr 2023 11:42:17 -0500 Subject: [PATCH 08/82] Working KEGG link. need to remove redundant results --- R/WebGestaltROra.R | 2 +- R/linkModification.R | 120 ++++++++++++++++++++++++++++++------------- 2 files changed, 85 insertions(+), 37 deletions(-) diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index 3cca5fb..ca97715 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -96,7 +96,7 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD geneTables <- getGeneTables(organism, enrichedSig, "overlapId", interestingGeneMap) if (organism != "others") { enrichedSig$link <- mapply( - function(link, geneList) linkModification("ORA", link, geneList, interestingGeneMap), + function(link, geneList) linkModification("ORA", link, geneList, interestingGeneMap, hostName), enrichedSig$link, enrichedSig$overlapId ) diff --git a/R/linkModification.R b/R/linkModification.R index 7782104..c6712f8 100644 --- a/R/linkModification.R +++ b/R/linkModification.R @@ -3,46 +3,94 @@ #' Currently, we only have wikipathway and kegg pathways that need to modify the link #' #' @keywords internal -linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) { +linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap, hostName = "https://www.webgestalt.org/") { + print("link modification") + print(enrichPathwayLink) + print(interestingGeneMap$standardId) + if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { + print("kegg metabolite link modified") + link <- keggMetaboliteLinkModification(enrichPathwayLink, geneList, interestingGeneMap, hostName) + return(link) + } else if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE)) { + link <- keggLinkModification(enrichPathwayLink, geneList) + return(link) + } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE)) { + link <- wikiLinkModification(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) + return(link) + } + return(enrichPathwayLink) +} - if (grepl("www.kegg.jp", enrichPathwayLink, fixed=TRUE)) { - link <- keggLinkModification(enrichPathwayLink, geneList) - return(link) - } - if (grepl("www.wikipathways.org", enrichPathwayLink, fixed=TRUE)) { - link <- wikiLinkModification(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) - return(link) - } - return(enrichPathwayLink) +keggLinkModification <- function(enrichPathwayLink, geneList) { + geneList <- gsub(";", "+", geneList) + enrichPathwayLink <- paste(enrichPathwayLink, "+", geneList, sep = "") + return(enrichPathwayLink) } -keggLinkModification <- function(enrichPathwayLink,geneList){ - geneList <- gsub(";","+",geneList) - enrichPathwayLink <- paste(enrichPathwayLink,"+",geneList,sep="") - return(enrichPathwayLink) +keggMetaboliteLinkModification <- function(enrichPathwayLink, geneList, interestingGeneMap, hostName) { + print(geneList) + geneList <- simple_mapping(unlist(strsplit(geneList, ";")), "hsapiens", "rampc", "kegg", "rampc", hostName) + geneList <- sapply(geneList, function(x) x <- gsub("kegg:", "", x, ignore.case = TRUE)) + print("==========") + geneList <- paste(geneList, collapse = "+") + print(geneList) + + enrichPathwayLink <- paste(enrichPathwayLink, "+", geneList, sep = "") + return(enrichPathwayLink) } wikiLinkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) { - geneMap <- interestingGeneMap$mapped - geneList <- unlist(strsplit(geneList,";")) - geneMap <- filter(geneMap, .data$entrezgene %in% geneList) - enrichPathwayLink <- paste0(enrichPathwayLink, - paste0(sapply(geneMap$geneSymbol, function(x) paste0("&label[]=", x)), collapse="") - #not many pathway have entrezgene xref. Using both also seem to interfere with coloring - #paste0(sapply(geneMap$entrezgene, function(x) paste0("&xref[]=", x, ",Entrez Gene")), collapse="") - ) - if (enrichMethod == "ORA") { - enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorPos) - } else if (enrichMethod == "GSEA") { - scores <- filter(interestingGeneMap$mapped, .data$entrezgene %in% geneList)[["score"]] - maxScore <- max(scores) - minScore <- min(scores) - tmp <- getPaletteForGsea(maxScore, minScore) - palette <- tmp[[1]] - breaks <- tmp[[2]] - colors <- sapply(scores, function(s) palette[max(which(breaks <= s))]) - colorStr <- paste(gsub("#", "%23", colors, fixed=TRUE), collapse=",") - enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorStr) - } - return(enrichPathwayLink) + geneMap <- interestingGeneMap$mapped + geneList <- unlist(strsplit(geneList, ";")) + geneMap <- filter(geneMap, .data$entrezgene %in% geneList) + enrichPathwayLink <- paste0( + enrichPathwayLink, + paste0(sapply(geneMap$geneSymbol, function(x) paste0("&label[]=", x)), collapse = "") + # not many pathway have entrezgene xref. Using both also seem to interfere with coloring + # paste0(sapply(geneMap$entrezgene, function(x) paste0("&xref[]=", x, ",Entrez Gene")), collapse="") + ) + if (enrichMethod == "ORA") { + enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorPos) + } else if (enrichMethod == "GSEA") { + scores <- filter(interestingGeneMap$mapped, .data$entrezgene %in% geneList)[["score"]] + maxScore <- max(scores) + minScore <- min(scores) + tmp <- getPaletteForGsea(maxScore, minScore) + palette <- tmp[[1]] + breaks <- tmp[[2]] + colors <- sapply(scores, function(s) palette[max(which(breaks <= s))]) + colorStr <- paste(gsub("#", "%23", colors, fixed = TRUE), collapse = ",") + enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorStr) + } + return(enrichPathwayLink) +} + + +simple_mapping <- function(id_list, organism, source_id, target_id, standard_id, hostName) { + response <- POST(file.path(hostName, "api", "idmapping"), + encode = "json", + body = list( + organism = organism, sourceType = source_id, + targetType = target_id, ids = id_list, standardId = standard_id + ) + ) + if (response$status_code != 200) { + stop(webRequestError(response)) + } + mapRes <- content(response) + # if (mapRes$status == 1) { + # stop(webApiError(mapRes)) + # } + mappedIds <- mapRes$mapped + unmappedIds <- unlist(mapRes$unmapped) + # if (length(mappedIds) == 0) { + # stop(idMappingError("empty")) + # } + names <- c("sourceId", "targetId") + mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN = function(x) { + x[names] + })), nrow = length(mappedIds), byrow = TRUE), stringsAsFactors = FALSE) + + colnames(mappedInputGene) <- c("sourceId", "targetId") + return(mappedInputGene$targetId) } From 0633bbb02ee43b720a749e3bb92a4ddb84b7c1d2 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 26 Apr 2023 14:24:43 -0500 Subject: [PATCH 09/82] Adds working wikidata links --- R/linkModification.R | 53 +++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/R/linkModification.R b/R/linkModification.R index c6712f8..9342fe1 100644 --- a/R/linkModification.R +++ b/R/linkModification.R @@ -11,7 +11,11 @@ linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interest print("kegg metabolite link modified") link <- keggMetaboliteLinkModification(enrichPathwayLink, geneList, interestingGeneMap, hostName) return(link) - } else if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE)) { + } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { + link <- wikiMetaboliteLinkModification(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap, hostName) + return(link) + } + else if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE)) { link <- keggLinkModification(enrichPathwayLink, geneList) return(link) } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE)) { @@ -28,17 +32,40 @@ keggLinkModification <- function(enrichPathwayLink, geneList) { } keggMetaboliteLinkModification <- function(enrichPathwayLink, geneList, interestingGeneMap, hostName) { - print(geneList) geneList <- simple_mapping(unlist(strsplit(geneList, ";")), "hsapiens", "rampc", "kegg", "rampc", hostName) geneList <- sapply(geneList, function(x) x <- gsub("kegg:", "", x, ignore.case = TRUE)) - print("==========") geneList <- paste(geneList, collapse = "+") - print(geneList) - enrichPathwayLink <- paste(enrichPathwayLink, "+", geneList, sep = "") return(enrichPathwayLink) } +wikiMetaboliteLinkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap, hostName) { + geneMap <- interestingGeneMap$mapped + hmdbGeneList <- simple_mapping(unlist(strsplit(geneList, ";")), "hsapiens", "rampc", "hmdb", "rampc", hostName, no_dups = TRUE) + hmdbGeneList <- sapply(hmdbGeneList, function(x) x <- gsub("hmdb:", "", x, ignore.case = TRUE)) + geneMap <- filter(geneMap, .data$rampc %in% geneList) + enrichPathwayLink <- paste0( + enrichPathwayLink, + paste0(sapply(hmdbGeneList, function(x) paste0("&xref[]=",x,",HMDB")), collapse = "") + # not many pathway have entrezgene xref. Using both also seem to interfere with coloring + # paste0(sapply(geneMap$entrezgene, function(x) paste0("&xref[]=", x, ",Entrez Gene")), collapse="") + ) + if (enrichMethod == "ORA") { + enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorPos) + } else if (enrichMethod == "GSEA") { + scores <- filter(interestingGeneMap$mapped, .data$entrezgene %in% geneList)[["score"]] + maxScore <- max(scores) + minScore <- min(scores) + tmp <- getPaletteForGsea(maxScore, minScore) + palette <- tmp[[1]] + breaks <- tmp[[2]] + colors <- sapply(scores, function(s) palette[max(which(breaks <= s))]) + colorStr <- paste(gsub("#", "%23", colors, fixed = TRUE), collapse = ",") + enrichPathwayLink <- paste0(enrichPathwayLink, "&colors=", colorStr) + } + return(enrichPathwayLink) +} + wikiLinkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) { geneMap <- interestingGeneMap$mapped geneList <- unlist(strsplit(geneList, ";")) @@ -66,7 +93,10 @@ wikiLinkModification <- function(enrichMethod, enrichPathwayLink, geneList, inte } -simple_mapping <- function(id_list, organism, source_id, target_id, standard_id, hostName) { +simple_mapping <- function(id_list, organism, source_id, target_id, standard_id, hostName, no_dups = FALSE) { + if (source_id == target_id) { + return(id_list) + } response <- POST(file.path(hostName, "api", "idmapping"), encode = "json", body = list( @@ -78,19 +108,16 @@ simple_mapping <- function(id_list, organism, source_id, target_id, standard_id, stop(webRequestError(response)) } mapRes <- content(response) - # if (mapRes$status == 1) { - # stop(webApiError(mapRes)) - # } mappedIds <- mapRes$mapped - unmappedIds <- unlist(mapRes$unmapped) - # if (length(mappedIds) == 0) { - # stop(idMappingError("empty")) - # } names <- c("sourceId", "targetId") mappedInputGene <- data.frame(matrix(unlist(lapply(replace_null(mappedIds), FUN = function(x) { x[names] })), nrow = length(mappedIds), byrow = TRUE), stringsAsFactors = FALSE) colnames(mappedInputGene) <- c("sourceId", "targetId") + if (no_dups) { + mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$sourceId), ] + mappedInputGene <- mappedInputGene[!duplicated(mappedInputGene$targetId), ] + } return(mappedInputGene$targetId) } From 5eeee7c1fb4143738dbac7a613fd9b350f6430bd Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 4 May 2023 16:54:48 -0500 Subject: [PATCH 10/82] add CLARA --- NAMESPACE | 3 ++- R/clara_cluster.R | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 R/clara_cluster.R diff --git a/NAMESPACE b/NAMESPACE index 70331e9..45d3daa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(WebGestaltR) export(WebGestaltRBatch) export(WebGestaltR_batch) export(kMedoid) +export(clara_cluster) export(affinityPropagation) export(formatCheck) export(goSlimSummary) @@ -30,7 +31,7 @@ import(methods) import(utils) importFrom(Rcpp,sourceCpp) importFrom(apcluster,apcluster) -importFrom(cluster, pam) +importFrom(cluster, pam, clara) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(dplyr,"%>%") diff --git a/R/clara_cluster.R b/R/clara_cluster.R new file mode 100644 index 0000000..bbdf5b7 --- /dev/null +++ b/R/clara_cluster.R @@ -0,0 +1,17 @@ +clara_cluster <- function(idsInSet, score){ + all.genes <- sort(unique(unlist(idsInSet))) + overlap.mat <- sapply(idsInSet, function(x) {as.integer(all.genes %in% x)}) + kmRes <- clara(overlap.mat, 5, metric="manhattan", stand=FALSE, samples=1000, pamLike=TRUE) + + #sort clusters to make exemplar the first member + clusters <- vector(mode="list", length(kmRes$medoids)) + print(kmRes$clusinfo) + if(length(kmRes$medoids) == 0){ + return(NULL) + } + for (i in 1:length(clusters)) { + clusters[[i]] <- kmRes$clustering[[i]][order(kmRes$clustering[[i]] == i, decreasing=TRUE)] + } + # print(kmRes$medoids) + return(list(clusters=sapply(clusters, names), representatives=kmRes$medoids)) +} From 52566039e453f653fed8d8fc2615ff262dabe8dd Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 16 May 2023 10:17:41 -0500 Subject: [PATCH 11/82] add ability to toggle redundancy methods --- R/WebGestaltR.R | 6 ++--- R/WebGestaltRGsea.R | 22 ++++++++++++++--- R/WebGestaltROra.R | 26 +++++++++++++++++---- R/enrichResultSection.R | 3 ++- inst/templates/enrichResultSection.mustache | 9 ++++++- 5 files changed, 53 insertions(+), 13 deletions(-) diff --git a/R/WebGestaltR.R b/R/WebGestaltR.R index ce8e003..5d08fc6 100644 --- a/R/WebGestaltR.R +++ b/R/WebGestaltR.R @@ -193,7 +193,7 @@ #' networkConstructionMethod="Network_Retrieval_Prioritization") #' } #' -WebGestaltR <- function(enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", ...) { +WebGestaltR <- function(enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, ...) { extraArgs <- list(...) if ('keepGSEAFolder' %in% names(extraArgs) | 'keepGseaFolder' %in% names(extraArgs)) { warning("Parameter keepGSEAFolder is obsolete.\n") @@ -229,9 +229,9 @@ WebGestaltR <- function(enrichMethod="ORA", organism="hsapiens", enrichDatabase= } projectName <- sanitizeFileName(projectName) # use for GOSlim summary file name, convert punct to _ if (enrichMethod == "ORA") { - enrichR <- WebGestaltROra(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName) + enrichR <- WebGestaltROra(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid) } else if (enrichMethod == "GSEA") { - enrichR <- WebGestaltRGsea(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, perNum=perNum, p=gseaP, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, saveRawGseaResult=saveRawGseaResult, plotFormat=gseaPlotFormat, nThreads=nThreads, cache=cache, hostName=hostName) + enrichR <- WebGestaltRGsea(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, perNum=perNum, p=gseaP, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, saveRawGseaResult=saveRawGseaResult, plotFormat=gseaPlotFormat, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid) } else if (enrichMethod == "NTA") { enrichR <- WebGestaltRNta(organism=organism, network=enrichDatabase, method=networkConstructionMethod, neighborNum=neighborNum, highlightSeedNum=highlightSeedNum, inputSeed=interestGene, inputSeedFile=interestGeneFile, interestGeneType=interestGeneType, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, outputDirectory=outputDirectory, projectName=projectName, highlightType=highlightType, cache=cache, hostName=hostName) } diff --git a/R/WebGestaltRGsea.R b/R/WebGestaltRGsea.R index 9112d97..9d2ecd5 100644 --- a/R/WebGestaltRGsea.R +++ b/R/WebGestaltRGsea.R @@ -1,6 +1,6 @@ #' @importFrom dplyr select distinct left_join arrange %>% mutate #' @importFrom readr write_tsv -WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, perNum=1000, p=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", saveRawGseaResult=FALSE, plotFormat="png", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/") { +WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, perNum=1000, p=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", saveRawGseaResult=FALSE, plotFormat="png", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE) { enrichMethod <- "GSEA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -116,14 +116,30 @@ WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichData pValue <- enrichedSig$pValue pValue[pValue == 0] <- .Machine$double.eps signedLogP <- -log(pValue) * sign(enrichedSig$enrichmentScore) - apRes <- affinityPropagation(idsInSet, signedLogP) - wscRes <- weightedSetCover(idsInSet, 1 / signedLogP, setCoverNum, nThreads) + apRes <- NULL + wscRes <- NULL + kRes <- NULL + if(useAffinityPropagation){ + apRes <- affinityPropagation(idsInSet, signedLogP) + } + if(useWeightedSetCover){ + wscRes <- weightedSetCover(idsInSet, 1 / signedLogP, setCoverNum, nThreads) + } + if(usekMedoid){ + kRes <- kMedoid(idsInSet, signedLogP) + } if (!is.null(apRes)) { writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) } else { apRes <- NULL } clusters$ap <- apRes + if (!is.null(kRes)) { + writeLines(sapply(kRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) + } else { + kRes <- NULL + } + clusters$km <- kRes if (!is.null(wscRes$topSets)) { writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, ".txt"))) clusters$wsc <- list(representatives=wscRes$topSets, coverage=wscRes$coverage) diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index ca97715..3b8ce1e 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -1,6 +1,6 @@ #' @importFrom readr write_tsv #' @importFrom dplyr left_join select arrange %>% desc mutate -WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/") { +WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE) { enrichMethod <- "ORA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -119,14 +119,30 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD names(idsInSet) <- enrichedSig$geneSet minusLogP <- -log(enrichedSig$pValue) minusLogP[minusLogP == Inf] <- -log(.Machine$double.eps) - apRes <- kMedoid(idsInSet, minusLogP) - wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) + apRes <- NULL + wscRes <- NULL + kRes <- NULL + if(useAffinityPropagation){ + apRes <- affinityPropagation(idsInSet, minusLogP) + } + if(useWeightedSetCover){ + wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) + } + if(usekMedoid){ + kRes <- kMedoid(idsInSet, minusLogP) + } if (!is.null(apRes)) { - writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) + writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) } else { - apRes <- NULL + apRes <- NULL } clusters$ap <- apRes + if (!is.null(kRes)) { + writeLines(sapply(kRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) + } else { + kRes <- NULL + } + clusters$km <- kRes if (!is.null(wscRes$topSets)) { writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, ".txt"))) clusters$wsc <- list(representatives = wscRes$topSets, coverage = wscRes$coverage) diff --git a/R/enrichResultSection.R b/R/enrichResultSection.R index 52075c8..9c24b6d 100644 --- a/R/enrichResultSection.R +++ b/R/enrichResultSection.R @@ -30,7 +30,8 @@ enrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetDes, hasMultipleDatabases = hasMultipleDatabases, setsWithNetJson = setsWithNetJson, hasAp = !is.null(clusters$ap), - hasWsc = !is.null(clusters$wsc) + hasWsc = !is.null(clusters$wsc), + hasKmed = !is.null(clusters$km) ) template <- readLines(system.file("templates/enrichResultSection.mustache", package = "WebGestaltR")) return(whisker.render(template, data)) diff --git a/inst/templates/enrichResultSection.mustache b/inst/templates/enrichResultSection.mustache index d2a1162..8a2df95 100644 --- a/inst/templates/enrichResultSection.mustache +++ b/inst/templates/enrichResultSection.mustache @@ -8,10 +8,17 @@ {{#hasAp}} - k-Medoid + k-Medoid {{/hasAp}} + {{#hasKmed}} + + + k-Medoid + + + {{/hasKmed}} {{#hasWsc}} From 6f519c28e0ca983b6c60e9c0a03679bef14277a3 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 18 May 2023 13:49:57 -0500 Subject: [PATCH 12/82] fix GSEA for metabolites --- .lintr | 5 +++-- R/idMappingMetabolites.R | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.lintr b/.lintr index 05abbd4..a13b6be 100644 --- a/.lintr +++ b/.lintr @@ -1,6 +1,7 @@ linters: linters_with_defaults( - line_length_linter(120), + line_length_linter = NULL, commented_code_linter = NULL, - no_tab_linter = NULL + no_tab_linter = NULL, + object_name_linter = NULL ) diff --git a/R/idMappingMetabolites.R b/R/idMappingMetabolites.R index b3774f7..47dcbd0 100644 --- a/R/idMappingMetabolites.R +++ b/R/idMappingMetabolites.R @@ -10,11 +10,12 @@ idMappingMetabolites <- function(organism = "hsapiens", dataType = "list", input } if (dataType == "rnk") { + ###### Collapse the gene ids with multiple scores########## x <- tapply(inputGene$score, inputGene$gene, collapseMethod) inputGene <- data.frame(gene = names(x), score = as.numeric(x), stringsAsFactors = FALSE) inputGeneL <- inputGene$gene - colnames(inputGene) <- c(sourceIdType, "score") + colnames(inputGene) <- c("userId", "score") } if (dataType == "gmt") { @@ -126,8 +127,21 @@ idMappingMetabolites <- function(organism = "hsapiens", dataType = "list", input } else { mappedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) } - inputGene <- mappedInputGene + inputGene$userId <- add_prefix(inputGene$userId, old_id_type) + if(dataType=="list"){ + inputGene <- select(mappedInputGene, .data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$gLink) + } + + if(dataType=="rnk"){ + inputGene <- mappedInputGene %>% left_join(inputGene, by=c("userId"="userId")) %>% + select(.data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$score, .data$gLink) + } + if(dataType=="gmt"){ + inputGene <- mappedInputGene %>% left_join(inputGene, by=c("userId"="userId")) %>% + select(.data$geneSet, .data$link, .data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$gLink) + } + # inputGene <- mappedInputGene ############# Output####################### if (mappingOutput) { idMappingOutput(outputFileName, inputGene, unmappedIds, dataType, old_id_type, targetIdType = targetIdType) @@ -136,6 +150,25 @@ idMappingMetabolites <- function(organism = "hsapiens", dataType = "list", input return(r) } +add_prefix <- function(x, sourceIdType) { + uppers <- c("LIPIDMAPS", "CAS") + if(toupper(sourceIdType) %in% uppers){ + return(unlist(sapply(x, function(y) { + if (grepl(":", y)) { + return(y) + } + return(paste0(toupper(sourceIdType), ":", toupper(y))) + }))) + } else { + return(unlist(sapply(x, function(y) { + if (grepl(":", y) && sourceIdType != "swisslipids") { + return(y) + } + return(paste0(sourceIdType, ":", y)) + }))) + } +} + replace_prefix <- function(x, prefix) { return(sapply(x, function(y) { return(sub(prefix, "", y, ignore.case = TRUE)) From 51a96acd4dc6371a977ea6643c40c12268f17a7c Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 18 May 2023 15:52:20 -0500 Subject: [PATCH 13/82] fix ORA --- R/idMappingMetabolites.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/idMappingMetabolites.R b/R/idMappingMetabolites.R index 47dcbd0..899f2b6 100644 --- a/R/idMappingMetabolites.R +++ b/R/idMappingMetabolites.R @@ -127,17 +127,19 @@ idMappingMetabolites <- function(organism = "hsapiens", dataType = "list", input } else { mappedInputGene$gLink <- paste0("URL NOT FOUND FOR TYPE ", sourceIdType) } - inputGene$userId <- add_prefix(inputGene$userId, old_id_type) + if(dataType=="list"){ inputGene <- select(mappedInputGene, .data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$gLink) } if(dataType=="rnk"){ + inputGene$userId <- add_prefix(inputGene$userId, old_id_type) inputGene <- mappedInputGene %>% left_join(inputGene, by=c("userId"="userId")) %>% select(.data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$score, .data$gLink) } if(dataType=="gmt"){ + inputGene$userId <- add_prefix(inputGene$userId, old_id_type) inputGene <- mappedInputGene %>% left_join(inputGene, by=c("userId"="userId")) %>% select(.data$geneSet, .data$link, .data$userId, .data$geneSymbol, .data$geneName, targetIdType, .data$gLink) } From 2d3346261d7dd118188b6d24abcb2db0424da201 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 5 Jul 2023 15:02:29 -0500 Subject: [PATCH 14/82] add maxK for kMedoid and fix affinity prop labeling --- .gitignore | 1 + R/WebGestaltR.R | 23 ++++++++++++++------- R/WebGestaltRGsea.R | 4 ++-- R/WebGestaltROra.R | 4 ++-- R/gseaEnrichment.R | 1 - R/kMedoid.R | 7 +++++-- R/reportUtils.R | 12 +++++++++++ inst/templates/enrichResultSection.mustache | 2 +- 8 files changed, 38 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 085035c..c071546 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ vignettes/*.pdf # R Environment Variables .Renviron .vscode +.DS_Store diff --git a/R/WebGestaltR.R b/R/WebGestaltR.R index 5d08fc6..ca2bbc7 100644 --- a/R/WebGestaltR.R +++ b/R/WebGestaltR.R @@ -12,7 +12,8 @@ #' categories have DAG (directed acyclic graph) structure or genes in the functional #' categories have network structure, those relationship can also be visualized in the #' report. -#' +#' +#' @param omic_type The type of omics analysis: \code{single} or \code{multi} #' @param enrichMethod Enrichment methods: \code{ORA}, \code{GSEA} or \code{NTA}. #' @param organism Currently, WebGestaltR supports 12 organisms. Users can use the function #' \code{listOrganism} to check available organisms. Users can also input \code{others} to @@ -50,6 +51,7 @@ #' @param interestGeneType The ID type of the interesting gene list. The supported ID types of #' WebGestaltR for the selected organism can be found by the function \code{listIdType}. If #' the \code{organism} is \code{others}, users do not need to set this parameter. +#' @param interestGeneNames The names of the id lists for multiomics data. #' @param collapseMethod The method to collapse duplicate IDs with scores. \code{mean}, #' \code{median}, \code{min} and \code{max} represent the mean, median, minimum and maximum #' of scores for the duplicate IDs. @@ -193,7 +195,7 @@ #' networkConstructionMethod="Network_Retrieval_Prioritization") #' } #' -WebGestaltR <- function(enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, ...) { +WebGestaltR <- function(omic_type = "single", enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, interestGeneNames=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10, ...) { extraArgs <- list(...) if ('keepGSEAFolder' %in% names(extraArgs) | 'keepGseaFolder' %in% names(extraArgs)) { warning("Parameter keepGSEAFolder is obsolete.\n") @@ -228,12 +230,17 @@ WebGestaltR <- function(enrichMethod="ORA", organism="hsapiens", enrichDatabase= projectName <- as.character(as.integer(Sys.time())) } projectName <- sanitizeFileName(projectName) # use for GOSlim summary file name, convert punct to _ - if (enrichMethod == "ORA") { - enrichR <- WebGestaltROra(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid) - } else if (enrichMethod == "GSEA") { - enrichR <- WebGestaltRGsea(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, perNum=perNum, p=gseaP, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, saveRawGseaResult=saveRawGseaResult, plotFormat=gseaPlotFormat, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid) - } else if (enrichMethod == "NTA") { - enrichR <- WebGestaltRNta(organism=organism, network=enrichDatabase, method=networkConstructionMethod, neighborNum=neighborNum, highlightSeedNum=highlightSeedNum, inputSeed=interestGene, inputSeedFile=interestGeneFile, interestGeneType=interestGeneType, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, outputDirectory=outputDirectory, projectName=projectName, highlightType=highlightType, cache=cache, hostName=hostName) + if (omic_type == "single"){ + if (enrichMethod == "ORA") { + enrichR <- WebGestaltROra(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) + } else if (enrichMethod == "GSEA") { + enrichR <- WebGestaltRGsea(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, perNum=perNum, p=gseaP, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, saveRawGseaResult=saveRawGseaResult, plotFormat=gseaPlotFormat, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) + } else if (enrichMethod == "NTA") { + enrichR <- WebGestaltRNta(organism=organism, network=enrichDatabase, method=networkConstructionMethod, neighborNum=neighborNum, highlightSeedNum=highlightSeedNum, inputSeed=interestGene, inputSeedFile=interestGeneFile, interestGeneType=interestGeneType, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, outputDirectory=outputDirectory, projectName=projectName, highlightType=highlightType, cache=cache, hostName=hostName) + } + } + else if (omic_type == "multi"){ + enrichR <- WebGestaltRMultiOmics(enrichMethod = enrichMethod, organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFiles=interestGeneFile, interestGenes =interestGene, interestGeneType=interestGeneType, interestGeneNames = interestGeneNames, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) } return(enrichR) diff --git a/R/WebGestaltRGsea.R b/R/WebGestaltRGsea.R index 9d2ecd5..bbdea58 100644 --- a/R/WebGestaltRGsea.R +++ b/R/WebGestaltRGsea.R @@ -1,6 +1,6 @@ #' @importFrom dplyr select distinct left_join arrange %>% mutate #' @importFrom readr write_tsv -WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, perNum=1000, p=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", saveRawGseaResult=FALSE, plotFormat="png", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE) { +WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, perNum=1000, p=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", saveRawGseaResult=FALSE, plotFormat="png", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "GSEA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -126,7 +126,7 @@ WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichData wscRes <- weightedSetCover(idsInSet, 1 / signedLogP, setCoverNum, nThreads) } if(usekMedoid){ - kRes <- kMedoid(idsInSet, signedLogP) + kRes <- kMedoid(idsInSet, signedLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index 3b8ce1e..2d44641 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -1,6 +1,6 @@ #' @importFrom readr write_tsv #' @importFrom dplyr left_join select arrange %>% desc mutate -WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE) { +WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "ORA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -129,7 +129,7 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) } if(usekMedoid){ - kRes <- kMedoid(idsInSet, minusLogP) + kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R index 90a24f0..a21f02d 100644 --- a/R/gseaEnrichment.R +++ b/R/gseaEnrichment.R @@ -5,7 +5,6 @@ gseaEnrichment <- function (hostName, outputDirectory, projectName, geneRankList if (!dir.exists(projectFolder)) { dir.create(projectFolder) } - colnames(geneRankList) <- c("gene", "score") sortedScores <- sort(geneRankList$score, decreasing=TRUE) diff --git a/R/kMedoid.R b/R/kMedoid.R index e05ca22..9e9cad4 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -1,9 +1,12 @@ -kMedoid <- function(idsInSet, score){ +kMedoid <- function(idsInSet, score, maxK = 10){ # first find out the union of sets, sorted all.genes <- sort(unique(unlist(idsInSet))) overlap.mat <- sapply(idsInSet, function(x) {as.integer(all.genes %in% x)}) num <- length(idsInSet) + if (num <= maxK) { + maxK <- num - 1 + } sim.mat <- matrix(1, num, num) colnames(sim.mat) <- colnames(overlap.mat) @@ -38,7 +41,7 @@ kMedoid <- function(idsInSet, score){ } # compute the k-medoid clustering - kmRes <- pam(sim.mat, 5, diss=TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. + kmRes <- pam(sim.mat, maxK, diss=TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. #sort clusters to make exemplar the first member clusters <- vector(mode="list", length(kmRes$medoids)) diff --git a/R/reportUtils.R b/R/reportUtils.R index 72875df..4f84f54 100644 --- a/R/reportUtils.R +++ b/R/reportUtils.R @@ -1,10 +1,22 @@ mapUserId <- function(enrichedSig,geneColumn,interestingGeneMap){ ####map entrez gene back to the original user id and add one more column to the enrichedSig standardId <- interestingGeneMap$standardId + mapgene <- interestingGeneMap$mapped[, c("userId", standardId)] gene <- enrichedSig[[geneColumn]] gene <- strsplit(gene,";") gene <- unlist(lapply(gene,geneM,mapgene)) + # print(gene) + # if (standardId == "rampc"){ + # gene <- unlist(lapply(gene, function(x) { + # gene_array <- strsplit(x,";") + # if (is.na(x)) { + # return(NA); + # } else { + # return(paste(strsplit(x, ":")[-1], sep=":")) + # } + # })) + # } enrichedSig <- data.frame(enrichedSig, userId=gene, stringsAsFactors=FALSE) return(enrichedSig) } diff --git a/inst/templates/enrichResultSection.mustache b/inst/templates/enrichResultSection.mustache index 8a2df95..373a942 100644 --- a/inst/templates/enrichResultSection.mustache +++ b/inst/templates/enrichResultSection.mustache @@ -8,7 +8,7 @@ {{#hasAp}} - k-Medoid + Affinity Propagation {{/hasAp}} From cf60e74ce6e0a8849a469b5766ffe50be2ad46d7 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 25 Sep 2023 10:24:11 -0500 Subject: [PATCH 15/82] add start of adding Rust to package --- .Rbuildignore | 1 + DESCRIPTION | 1 + R/extendr-wrappers.R | 12 + src/.gitignore | 2 + src/Makevars | 30 +++ src/Makevars.ucrt | 5 + src/Makevars.win | 40 +++ src/RcppExports.cpp | 3 + src/WebGestaltR-win.def | 2 + src/entrypoint.c | 8 + src/rust/Cargo.lock | 526 ++++++++++++++++++++++++++++++++++++++++ src/rust/Cargo.toml | 12 + src/rust/src/lib.rs | 24 ++ 13 files changed, 666 insertions(+) create mode 100644 .Rbuildignore create mode 100644 R/extendr-wrappers.R create mode 100644 src/Makevars create mode 100644 src/Makevars.ucrt create mode 100644 src/Makevars.win create mode 100644 src/WebGestaltR-win.def create mode 100644 src/entrypoint.c create mode 100644 src/rust/Cargo.lock create mode 100644 src/rust/Cargo.toml create mode 100644 src/rust/src/lib.rs diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..a03a6ba --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1 @@ +^src/\.cargo$ diff --git a/DESCRIPTION b/DESCRIPTION index 098a97b..17c9e16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,3 +22,4 @@ Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), NeedsCompilation: yes LinkingTo: Rcpp RoxygenNote: 7.2.3 +Config/rextendr/version: 0.3.1 diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R new file mode 100644 index 0000000..82aa07a --- /dev/null +++ b/R/extendr-wrappers.R @@ -0,0 +1,12 @@ +# nolint start + +#' @docType package +#' @usage NULL +#' @useDynLib WebGestaltR, .registration = TRUE +NULL + +#' Return string `"Hello world!"` to R. +#' @export +hello_world <- function() .Call(wrap__hello_world) + +# nolint end diff --git a/src/.gitignore b/src/.gitignore index 22034c4..c23c7b3 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,3 +1,5 @@ *.o *.so *.dll +target +.cargo diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..cdfb2a5 --- /dev/null +++ b/src/Makevars @@ -0,0 +1,30 @@ +TARGET_DIR = ./rust/target +LIBDIR = $(TARGET_DIR)/release +STATLIB = $(LIBDIR)/libWebGestaltR.a +PKG_LIBS = -L$(LIBDIR) -lWebGestaltR + +all: C_clean + +$(SHLIB): $(STATLIB) + +CARGOTMP = $(CURDIR)/.cargo + +$(STATLIB): + # In some environments, ~/.cargo/bin might not be included in PATH, so we need + # to set it here to ensure cargo can be invoked. It is appended to PATH and + # therefore is only used if cargo is absent from the user's PATH. + if [ "$(NOT_CRAN)" != "true" ]; then \ + export CARGO_HOME=$(CARGOTMP); \ + fi && \ + export PATH="$(PATH):$(HOME)/.cargo/bin" && \ + cargo build --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) + if [ "$(NOT_CRAN)" != "true" ]; then \ + rm -Rf $(CARGOTMP) && \ + rm -Rf $(LIBDIR)/build; \ + fi + +C_clean: + rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) + +clean: + rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) rust/target diff --git a/src/Makevars.ucrt b/src/Makevars.ucrt new file mode 100644 index 0000000..17b153e --- /dev/null +++ b/src/Makevars.ucrt @@ -0,0 +1,5 @@ +# Rtools42 doesn't have the linker in the location that cargo expects, so we +# need to overwrite it via configuration. +CARGO_LINKER = x86_64-w64-mingw32.static.posix-gcc.exe + +include Makevars.win diff --git a/src/Makevars.win b/src/Makevars.win new file mode 100644 index 0000000..717c761 --- /dev/null +++ b/src/Makevars.win @@ -0,0 +1,40 @@ +TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu + +TARGET_DIR = ./rust/target +LIBDIR = $(TARGET_DIR)/$(TARGET)/release +STATLIB = $(LIBDIR)/libWebGestaltR.a +PKG_LIBS = -L$(LIBDIR) -lWebGestaltR -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll + +all: C_clean + +$(SHLIB): $(STATLIB) + +CARGOTMP = $(CURDIR)/.cargo + +$(STATLIB): + mkdir -p $(TARGET_DIR)/libgcc_mock + # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have + # `libgcc_eh` due to the compilation settings. So, in order to please the + # compiler, we need to add empty `libgcc_eh` to the library search paths. + # + # For more details, please refer to + # https://github.com/r-windows/rtools-packages/blob/2407b23f1e0925bbb20a4162c963600105236318/mingw-w64-gcc/PKGBUILD#L313-L316 + touch $(TARGET_DIR)/libgcc_mock/libgcc_eh.a + + # CARGO_LINKER is provided in Makevars.ucrt for R >= 4.2 + if [ "$(NOT_CRAN)" != "true" ]; then \ + export CARGO_HOME=$(CARGOTMP); \ + fi && \ + export CARGO_TARGET_X86_64_PC_WINDOWS_GNU_LINKER="$(CARGO_LINKER)" && \ + export LIBRARY_PATH="$${LIBRARY_PATH};$(CURDIR)/$(TARGET_DIR)/libgcc_mock" && \ + cargo build --target=$(TARGET) --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) + if [ "$(NOT_CRAN)" != "true" ]; then \ + rm -Rf $(CARGOTMP) && \ + rm -Rf $(LIBDIR)/build; \ + fi + +C_clean: + rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) + +clean: + rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) $(TARGET_DIR) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 939e9b8..01c0dc5 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -37,9 +37,12 @@ BEGIN_RCPP END_RCPP } +RcppExport SEXP wrap__hello_world(); + static const R_CallMethodDef CallEntries[] = { {"_WebGestaltR_fillInputDataFrame", (DL_FUNC) &_WebGestaltR_fillInputDataFrame, 3}, {"_WebGestaltR_gseaPermutation", (DL_FUNC) &_WebGestaltR_gseaPermutation, 3}, + {"wrap__hello_world", (DL_FUNC) &wrap__hello_world, 0}, {NULL, NULL, 0} }; diff --git a/src/WebGestaltR-win.def b/src/WebGestaltR-win.def new file mode 100644 index 0000000..aacbc61 --- /dev/null +++ b/src/WebGestaltR-win.def @@ -0,0 +1,2 @@ +EXPORTS +R_init_WebGestaltR diff --git a/src/entrypoint.c b/src/entrypoint.c new file mode 100644 index 0000000..164c182 --- /dev/null +++ b/src/entrypoint.c @@ -0,0 +1,8 @@ +// We need to forward routine registration from C to Rust +// to avoid the linker removing the static library. + +void R_init_WebGestaltR_extendr(void *dll); + +void R_init_WebGestaltR(void *dll) { + R_init_WebGestaltR_extendr(dll); +} diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock new file mode 100644 index 0000000..5a5693d --- /dev/null +++ b/src/rust/Cargo.lock @@ -0,0 +1,526 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "WebGestaltR" +version = "0.1.0" +dependencies = [ + "extendr-api", + "webgestalt_lib", +] + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bytemuck" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "extendr-api" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e36d66fa307948c291a6fc5b09d8295dd58e88ab5e8d782d30e23670113e9ab" +dependencies = [ + "extendr-engine", + "extendr-macros", + "lazy_static", + "libR-sys", + "paste", +] + +[[package]] +name = "extendr-engine" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8298d5a2e38bb91820b92bbd7e5aaf1d3b95ed9f096fc66393c50af38ff8155d" +dependencies = [ + "libR-sys", +] + +[[package]] +name = "extendr-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09bf0849f0d48209be8163378248137fed5ccb5f464d171cf93a19f31a9e6c67" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libR-sys" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd728a97b9b0975f546bc865a7413e0ce6f98a8f6cea52e77dc5ee0bcea00adf" + +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + +[[package]] +name = "matrixmultiply" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "nalgebra" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "safe_arch" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "simba" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + +[[package]] +name = "statrs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "webgestalt_lib" +version = "0.1.0" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#b8272997030eab7c902dd355a58b737b7542ffc7" +dependencies = [ + "csv", + "rand", + "rayon", + "rustc-hash", + "serde", + "statrs", +] + +[[package]] +name = "wide" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa469ffa65ef7e0ba0f164183697b89b854253fd31aeb92358b7b6155177d62f" +dependencies = [ + "bytemuck", + "safe_arch", +] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml new file mode 100644 index 0000000..2fb08ca --- /dev/null +++ b/src/rust/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = 'WebGestaltR' +version = '0.1.0' +edition = '2021' + +[lib] +crate-type = [ 'staticlib' ] +name = 'WebGestaltR' + +[dependencies] +extendr-api = '0.4.0' +webgestalt_lib = {git = "https://github.com/bzhanglab/webgestalt_rust.git"} diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs new file mode 100644 index 0000000..1b2ec30 --- /dev/null +++ b/src/rust/src/lib.rs @@ -0,0 +1,24 @@ +use extendr_api::prelude::*; +use webgestalt_lib::methods::*; + +/// Return string `"Hello world!"` to R. +/// @export +#[extendr] +fn hello_world() -> &'static str { + "Hello world!" +} + +/// Run GSEA using rust library +/// @export +#[extendr] +fn gsea_rust() -> () { + // webgestalt_lib::methods::gsea:: +} + +// Macro to generate exports. +// This ensures exported functions are registered with R. +// See corresponding C code in `entrypoint.c`. +extendr_module! { + mod WebGestaltR; + fn hello_world; +} From 9105be24f08825277a8a8f5236266c1447526e8a Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 6 Oct 2023 16:38:22 -0500 Subject: [PATCH 16/82] add rewrite of fill_input_matrix fix typo --- DESCRIPTION | 1 + R/swGsea.R | 574 ++++++++++++++++++++++---------------------- README.md | 6 + src/rust/Cargo.lock | 47 ++-- src/rust/Cargo.toml | 4 +- src/rust/src/lib.rs | 50 +++- 6 files changed, 377 insertions(+), 305 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 17c9e16..274b573 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,4 +22,5 @@ Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), NeedsCompilation: yes LinkingTo: Rcpp RoxygenNote: 7.2.3 +SystemRequirements: Cargo (Rust's package manager), rustc v1.66.0 Config/rextendr/version: 0.3.1 diff --git a/R/swGsea.R b/R/swGsea.R index 94940f3..8f3a789 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -74,307 +74,315 @@ #' @export #' @author Eric Jaehnig #' -swGsea <- function(input_df, thresh_type="percentile", thresh=0.9, thresh_action="exclude", min_set_size=10, max_set_size=500, max_score="max", min_score="min", psuedocount=0.001, perms=1000, p=1, q=1, nThreads=1, rng_seed=1, fork=FALSE){ - # check input parameters - if(thresh_type!="percentile"&thresh_type!="list"&thresh_type!="val"&thresh_type!="values"){ - stop("invalid thresh_type specified; needs to be set to 'percentile' to include all scores over that percentile (i.e., 0.9 would be all items in 90th percentile, or top 10 percent) 'list' to include a list of set lists where the set lists are in the same order as the corresponding set columns in the input_df, 'val' to apply a single threshold value to all sets, or 'values' to use a vector of unique cutoffs for each set (needs to be in the same order as the sets are specified in the columns of input_df") - } - if(thresh_action!="exclude"&thresh_action!="include"&thresh_action!="adjust"){ - stop("invalid thresh_action specified; needs to be set to 'exclude' to skip set if it contains no items after applying score threshold (or contains all items), or 'include' to include values for the set at the end of the results (ES and NES automatically set to 0 and pval to 1) or 'adjust' to adjust threshold to add at least min_set_size items below thresh to set (or remove all items equal to the minimum set score value from the set)") - } - if(min_set_size<3){ - stop("please set 'min_set_size' to 3 or greater (default is 5)") - } - if(max_score!="max"){ - if(length(max_score)!=(ncol(input_df)-2)|(!is.numeric(max_score))){ - stop("max_score needs to be set to max or contain a numeric vector of maximum scores for each set") - } - } - if(min_score!="min"){ - if(length(min_score)==(ncol(input_df)-2)|(!is.numeric(min_score))){ - stop("min_score needs to be set to min or contain a numeric vector of minimum scores for each set") - } - } - if(!psuedocount>0){ - stop("psuedocount must be greater than 0") - } - pc <- psuedocount +swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_action = "exclude", min_set_size = 10, max_set_size = 500, max_score = "max", min_score = "min", psuedocount = 0.001, perms = 1000, p = 1, q = 1, nThreads = 1, rng_seed = 1, fork = FALSE) { + # check input parameters + if (thresh_type != "percentile" & thresh_type != "list" & thresh_type != "val" & thresh_type != "values") { + stop("invalid thresh_type specified; needs to be set to 'percentile' to include all scores over that percentile (i.e., 0.9 would be all items in 90th percentile, or top 10 percent) 'list' to include a list of set lists where the set lists are in the same order as the corresponding set columns in the input_df, 'val' to apply a single threshold value to all sets, or 'values' to use a vector of unique cutoffs for each set (needs to be in the same order as the sets are specified in the columns of input_df") + } + if (thresh_action != "exclude" & thresh_action != "include" & thresh_action != "adjust") { + stop("invalid thresh_action specified; needs to be set to 'exclude' to skip set if it contains no items after applying score threshold (or contains all items), or 'include' to include values for the set at the end of the results (ES and NES automatically set to 0 and pval to 1) or 'adjust' to adjust threshold to add at least min_set_size items below thresh to set (or remove all items equal to the minimum set score value from the set)") + } + if (min_set_size < 3) { + stop("please set 'min_set_size' to 3 or greater (default is 5)") + } + if (max_score != "max") { + if (length(max_score) != (ncol(input_df) - 2) | (!is.numeric(max_score))) { + stop("max_score needs to be set to max or contain a numeric vector of maximum scores for each set") + } + } + if (min_score != "min") { + if (length(min_score) == (ncol(input_df) - 2) | (!is.numeric(min_score))) { + stop("min_score needs to be set to min or contain a numeric vector of minimum scores for each set") + } + } + if (!psuedocount > 0) { + stop("psuedocount must be greater than 0") + } + pc <- psuedocount - # re-order by log ratios (rank order from highest to lowest) - expt <- colnames(input_df)[2] - enr_test <- colnames(input_df)[3:ncol(input_df)] - colnames(input_df)[c(1,2)] <- c("item", "expression_val") - input_df <- arrange(input_df, desc(.data$expression_val)) + # re-order by log ratios (rank order from highest to lowest) + expt <- colnames(input_df)[2] + enr_test <- colnames(input_df)[3:ncol(input_df)] + colnames(input_df)[c(1, 2)] <- c("item", "expression_val") + input_df <- arrange(input_df, desc(.data$expression_val)) - # get and check size of set items; build in-set matrix of 1's for items in set and 0's for items not in set - inset_mat <- matrix(0, nrow = length(input_df$item), ncol = length(enr_test)) - dimnames(inset_mat) <- list(input_df$item, enr_test) - # if list of items provided for each set, check to make sure each item is in dataset and set inset_mat to 1 if it is and 0 if not - if(thresh_type=="list"&is.list(thresh)){ - thresh_action <- "exclude" - for(a in 1:length(thresh)){ - skip = "" - if(length(setdiff(thresh[[a]], input_df$item)) > 0){ - skip <- setdiff(thresh[[a]], input_df$item) - warning(paste0(paste(skip, collapse = ", "), " are not items in the input dataframe and will be skipped.\n")) - } - for(b in 1:length(thresh[[a]])){ - if(!thresh[[a]][b] %in% skip){ - inset_mat[thresh[[a]][b], a] <- 1 - } - } - } - # if numeric threshold provided, set inset_mat to 1 for items that meet threshold or 0 for those that don't - } else if(is.numeric(thresh)){ - if(thresh_type=="values" & length(thresh)==length(enr_test)){ - for(a in 1:length(thresh)){ - items_in_set <- input_df$item[input_df[ , enr_test[a]]>=thresh[a]] - inset_mat[items_in_set, enr_test[a]] <- 1 - } - } else if(thresh_type=="percentile" & thresh>0 & thresh<1){ - thresh1 <- vector(mode = "numeric", length = length(enr_test)) - for(a in 1:length(enr_test)){ - thresh1[a] = quantile(input_df[ , enr_test[a]], probs = thresh) - items_in_set <- input_df$item[input_df[ , enr_test[a]]>=thresh1[a]] - inset_mat[items_in_set, enr_test[a]] <- 1 - } - thresh <- thresh1 - } else if(thresh_type=="val" & length(thresh)==1){ - for(a in 1:ncol(inset_mat)){ - items_in_set <- input_df$item[input_df[ , enr_test[a]]>=thresh] - inset_mat[items_in_set, enr_test[a]] <- 1 - } - thresh <- rep(thresh, times=ncol(inset_mat)) - } - } else { stop("improper threshold specified") } + # get and check size of set items; build in-set matrix of 1's for items in set and 0's for items not in set + inset_mat <- matrix(0, nrow = length(input_df$item), ncol = length(enr_test)) + dimnames(inset_mat) <- list(input_df$item, enr_test) + # if list of items provided for each set, check to make sure each item is in dataset and set inset_mat to 1 if it is and 0 if not + if (thresh_type == "list" & is.list(thresh)) { + thresh_action <- "exclude" + for (a in 1:length(thresh)) { + skip <- "" + if (length(setdiff(thresh[[a]], input_df$item)) > 0) { + skip <- setdiff(thresh[[a]], input_df$item) + warning(paste0(paste(skip, collapse = ", "), " are not items in the input dataframe and will be skipped.\n")) + } + for (b in seq_along(thresh[[a]])) { + if (!thresh[[a]][b] %in% skip) { + inset_mat[thresh[[a]][b], a] <- 1 + } + } + } + # if numeric threshold provided, set inset_mat to 1 for items that meet threshold or 0 for those that don't + } else if (is.numeric(thresh)) { + if (thresh_type == "values" & length(thresh) == length(enr_test)) { + for (a in 1:length(thresh)) { + items_in_set <- input_df$item[input_df[, enr_test[a]] >= thresh[a]] + inset_mat[items_in_set, enr_test[a]] <- 1 + } + } else if (thresh_type == "percentile" & thresh > 0 & thresh < 1) { + thresh1 <- vector(mode = "numeric", length = length(enr_test)) + for (a in 1:length(enr_test)) { + thresh1[a] <- quantile(input_df[, enr_test[a]], probs = thresh) + items_in_set <- input_df$item[input_df[, enr_test[a]] >= thresh1[a]] + inset_mat[items_in_set, enr_test[a]] <- 1 + } + thresh <- thresh1 + } else if (thresh_type == "val" & length(thresh) == 1) { + for (a in 1:ncol(inset_mat)) { + items_in_set <- input_df$item[input_df[, enr_test[a]] >= thresh] + inset_mat[items_in_set, enr_test[a]] <- 1 + } + thresh <- rep(thresh, times = ncol(inset_mat)) + } + } else { + stop("improper threshold specified") + } - # check to make sure minimum number of items present in each set; if not, skip or adjust according to thresh_action - skipped_sets <- character(0) - for(c in 1:ncol(inset_mat)){ - if(sum(inset_mat[ , c]) < min_set_size){ - too_small <- colnames(inset_mat)[c] - warning(paste0(expt, " does not contain minimum number of items in set for ", too_small, "\n")) - if(thresh_action=="exclude"|thresh_action=="include"){ - skipped_sets <- c(skipped_sets, too_small) - } else { - # lower threshold to first value that would include minimum number of items in set - lower_scores <- sort(input_df[input_df[too_small]=new_thresh, c] <- 1 - # check to make sure the set meets size requirements after applying new threshold, if not exclude set - if((sum(inset_mat[ , c])= new_thresh, c] <- 1 + # check to make sure the set meets size requirements after applying new threshold, if not exclude set + if ((sum(inset_mat[, c]) < min_set_size) | (sum(inset_mat[, c]) == length(input_df$item))) { + skipped_sets <- c(skipped_sets, too_small) + warning(paste0("cannot adjust threshold for ", too_small, " to meet size requirements for analysis in ", expt), "; skipping set\n") + } + } + } + } - # now check to make sure each set contains fewer items than the max_set_size threshold (default=500); adjust score threshold to obtain valid set size or skip according to thresh_action - for(c in 1:ncol(inset_mat)){ - check_col <- colnames(inset_mat)[c] - if(sum(inset_mat[ , c]) > max_set_size){ - warning(paste0(expt, " has more than ", max_set_size, " items in set ", check_col, "\n")) - if(thresh_action=="exclude"|thresh_action=="include"){ - skipped_sets <- c(skipped_sets, check_col) - } else { - # set items with minimum value to 0 to reduce set size (note: while loop not necessary here because max set size is all items in the dataset, but may be useful if we decide to impose a maximum set size later) - while(sum(inset_mat[ , c]) > max_set_size){ - inset_mat[input_df$item[input_df[, check_col] == min(input_df[inset_mat[ , c]==1, check_col])], check_col] <- 0 - } - # make sure this adjustment didn't remove too many items - if(sum(inset_mat[ , c]) max_set_size) { + warning(paste0(expt, " has more than ", max_set_size, " items in set ", check_col, "\n")) + if (thresh_action == "exclude" | thresh_action == "include") { + skipped_sets <- c(skipped_sets, check_col) + } else { + # set items with minimum value to 0 to reduce set size (note: while loop not necessary here because max set size is all items in the dataset, but may be useful if we decide to impose a maximum set size later) + while (sum(inset_mat[, c]) > max_set_size) { + inset_mat[input_df$item[input_df[, check_col] == min(input_df[inset_mat[, c] == 1, check_col])], check_col] <- 0 + } + # make sure this adjustment didn't remove too many items + if (sum(inset_mat[, c]) < min_set_size) { + skipped_sets <- c(skipped_sets, check_col) + warning(paste0("cannot adjust threshold for ", check_col, " to meet size requirements for analysis in ", expt), "; skipping set\n") + } + } + } + } - # remove skipped columns - # print(c("sets that don't contain proper number of items:", skipped_sets)) - inset_mat <- inset_mat[ , !(colnames(inset_mat)%in%skipped_sets)] + # remove skipped columns + # print(c("sets that don't contain proper number of items:", skipped_sets)) + inset_mat <- inset_mat[, !(colnames(inset_mat) %in% skipped_sets)] - if (is.null(ncol(inset_mat))) { - stop("All gene sets are skipped! Please try to descrease the minimum set size.\n") - } + if (is.null(ncol(inset_mat))) { + stop("All gene sets are skipped! Please try to decrease the minimum set size.\n") + } - # generate list containing names of items in each set and ranks of those items - items_in_set <- list() - for(it in 1:ncol(inset_mat)){ - items_in_set[[colnames(inset_mat)[it]]] <- data.frame(which(inset_mat[ , it]==1), stringsAsFactors = F) - colnames(items_in_set[[colnames(inset_mat)[it]]]) = "rank" - } + # generate list containing names of items in each set and ranks of those items + items_in_set <- list() + for (it in 1:ncol(inset_mat)) { + items_in_set[[colnames(inset_mat)[it]]] <- data.frame(which(inset_mat[, it] == 1), stringsAsFactors = F) + colnames(items_in_set[[colnames(inset_mat)[it]]]) <- "rank" + } - # adjust set scores to range from a minimum of 0+pc to a maximum of 1 (s=(Score-minScore+pc)/(maxScore-minScore+pc)); calculate s^q * |r|^p score for each item in each set; get sum of these scores for each set - if(max_score=="max"){ - max_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - } else { max_scores <- max_score } - if(min_score=="min"){ - min_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - } else { min_scores <- min_score } - scaled_scores <- inset_mat - set_scores <- inset_mat * input_df[ , colnames(inset_mat)] - set_tot <- vector(mode = "numeric", length = ncol(inset_mat)) - adj_expr_val <- (abs(input_df$expression_val))^p - for(d in 1:ncol(set_scores)){ - if(max_score=="max"){ - max_scores[d] <- max(set_scores[ , d]) - } - if(min_score=="min"){ - min_scores[d] <- min(set_scores[set_scores[ , d]>0, d]) - } - set_scores[set_scores[ , d] > max_scores[d], d] <- max_scores[d] - set_scores[set_scores[ , d] < min_scores[d], d] <- min_scores[d] + # adjust set scores to range from a minimum of 0+pc to a maximum of 1 (s=(Score-minScore+pc)/(maxScore-minScore+pc)); calculate s^q * |r|^p score for each item in each set; get sum of these scores for each set + if (max_score == "max") { + max_scores <- vector(mode = "numeric", length = ncol(inset_mat)) + } else { + max_scores <- max_score + } + if (min_score == "min") { + min_scores <- vector(mode = "numeric", length = ncol(inset_mat)) + } else { + min_scores <- min_score + } + scaled_scores <- inset_mat + set_scores <- inset_mat * input_df[, colnames(inset_mat)] + set_tot <- vector(mode = "numeric", length = ncol(inset_mat)) + adj_expr_val <- (abs(input_df$expression_val))^p + for (d in 1:ncol(set_scores)) { + if (max_score == "max") { + max_scores[d] <- max(set_scores[, d]) + } + if (min_score == "min") { + min_scores[d] <- min(set_scores[set_scores[, d] > 0, d]) + } + set_scores[set_scores[, d] > max_scores[d], d] <- max_scores[d] + set_scores[set_scores[, d] < min_scores[d], d] <- min_scores[d] - # if the max set score equals the minimum set score, all scores for items in set will be 1; therefore, let user know that this analysis now reverts to standard GSEA - # if(max_scores[d]==min_scores[d]){ - # print(paste0("max score for set ", colnames(inset_mat)[d], " equals min score; results will be equivalent to standard GSEA")) - # } + # if the max set score equals the minimum set score, all scores for items in set will be 1; therefore, let user know that this analysis now reverts to standard GSEA + # if(max_scores[d]==min_scores[d]){ + # print(paste0("max score for set ", colnames(inset_mat)[d], " equals min score; results will be equivalent to standard GSEA")) + # } - # scales scores for sets - scaled_scores[ , d] <- ((set_scores[ , d] - min_scores[d] + pc)/(max_scores[d] - min_scores[d] + pc))^q - # multiple by inset_mat to set scores for items not in set to 0 - scaled_scores[ , d] <- scaled_scores[ ,d ] * inset_mat[ , d] - set_scores[ , d] <- scaled_scores[ , d] * adj_expr_val - if (sum(set_scores[ , d]) == 0) { - set_scores[ , d] <- scaled_scores[ , d] - } - set_tot[d] <- sum(set_scores[ , d]) - } + # scales scores for sets + scaled_scores[, d] <- ((set_scores[, d] - min_scores[d] + pc) / (max_scores[d] - min_scores[d] + pc))^q + # multiple by inset_mat to set scores for items not in set to 0 + scaled_scores[, d] <- scaled_scores[, d] * inset_mat[, d] + set_scores[, d] <- scaled_scores[, d] * adj_expr_val + if (sum(set_scores[, d]) == 0) { + set_scores[, d] <- scaled_scores[, d] + } + set_tot[d] <- sum(set_scores[, d]) + } - # get set of items not in in_set - outset_mat <- 1 - inset_mat + # get set of items not in in_set + outset_mat <- 1 - inset_mat - # convert to vectors to matrices with same dimensions as inset_mat to use to calculate Running_Sum and for permutations below - # expr_mat <- matrix(rep(input_df$expression_val, times = ncol(inset_mat)), nrow = nrow(input_df), dimnames = list(rownames(inset_mat), colnames(inset_mat))) - set_tot <- t(matrix(rep(set_tot, times=nrow(inset_mat)), nrow=length(set_tot), dimnames=list(colnames(inset_mat), rownames(inset_mat)))) - outset_mat_sums <- t(matrix(rep(colSums(outset_mat), times=nrow(outset_mat)), nrow = ncol(outset_mat), dimnames = list(colnames(outset_mat), rownames(outset_mat)))) - outset_scores <- outset_mat/outset_mat_sums - scores_mat <- (set_scores / set_tot) - outset_scores + # convert to vectors to matrices with same dimensions as inset_mat to use to calculate Running_Sum and for permutations below + # expr_mat <- matrix(rep(input_df$expression_val, times = ncol(inset_mat)), nrow = nrow(input_df), dimnames = list(rownames(inset_mat), colnames(inset_mat))) + set_tot <- t(matrix(rep(set_tot, times = nrow(inset_mat)), nrow = length(set_tot), dimnames = list(colnames(inset_mat), rownames(inset_mat)))) + outset_mat_sums <- t(matrix(rep(colSums(outset_mat), times = nrow(outset_mat)), nrow = ncol(outset_mat), dimnames = list(colnames(outset_mat), rownames(outset_mat)))) + outset_scores <- outset_mat / outset_mat_sums + scores_mat <- (set_scores / set_tot) - outset_scores - # walk through ranked list and tally running total; also keep track of maximum and minimum values - Running_Sum <- matrix(0, ncol=ncol(scores_mat), nrow = nrow(scores_mat), dimnames = dimnames(scores_mat)) - running_max <- vector(mode = "numeric", length = ncol(scores_mat)) - running_min <- vector(mode = "numeric", length = ncol(scores_mat)) - for(e in 1:ncol(Running_Sum)){ - Running_Sum[ , e] <- cumsum(scores_mat[ , e]) - running_max[e] <- max(Running_Sum[ , e]) - running_min[e] <- min(Running_Sum[ , e]) - } + # walk through ranked list and tally running total; also keep track of maximum and minimum values + Running_Sum <- matrix(0, ncol = ncol(scores_mat), nrow = nrow(scores_mat), dimnames = dimnames(scores_mat)) + running_max <- vector(mode = "numeric", length = ncol(scores_mat)) + running_min <- vector(mode = "numeric", length = ncol(scores_mat)) + for (e in 1:ncol(Running_Sum)) { + Running_Sum[, e] <- cumsum(scores_mat[, e]) + running_max[e] <- max(Running_Sum[, e]) + running_min[e] <- min(Running_Sum[, e]) + } - # permute df 1000x and use to determine p-value of max/min score from previous section - set.seed(rng_seed) - # FORK is an option for Unix machines to reduce memory footprint - if(fork==T) { - cl <- makeCluster(nThreads, type = "FORK") - } else { cl <- makeCluster(nThreads) } - registerDoParallel(cl) - # use dorng instead of dopar to properly pass rng seed to foreach loop - rand_stats <- foreach(i=1:perms, .combine = 'rbind') %dorng% { - gseaPermutation(scaled_scores, outset_scores, adj_expr_val) + # permute df 1000x and use to determine p-value of max/min score from previous section + set.seed(rng_seed) + # FORK is an option for Unix machines to reduce memory footprint + if (fork == T) { + cl <- makeCluster(nThreads, type = "FORK") + } else { + cl <- makeCluster(nThreads) + } + registerDoParallel(cl) + # use dorng instead of dopar to properly pass rng seed to foreach loop + rand_stats <- foreach(i = 1:perms, .combine = "rbind") %dorng% { + gseaPermutation(scaled_scores, outset_scores, adj_expr_val) - ## R implementation of permutations - # - # rand_df <- input_df[sample(nrow(input_df)), , drop = F] - # rand_df$expression_val <- input_df$expression_val - # rand_df <- rand_df[ , c("item","expression_val", colnames(inset_mat)), drop = F] - # rand_scaled_scores <- scaled_scores[rand_df$item, , drop=F] - # rand_outset_scores <- outset_scores[rand_df$item, , drop=F] - # - # rand_adj_scores <- rand_scaled_scores * (abs(expr_mat)^p) - # - # rand_set_tot <- colSums(rand_adj_scores) - # rand_set_tot <- t(matrix(rep(rand_set_tot, times = nrow(rand_adj_scores)), nrow = length(rand_set_tot), dimnames = list(colnames(rand_adj_scores), rownames(rand_adj_scores)))) - # rand_scores <- (rand_adj_scores / (rand_set_tot + 0.000001)) - rand_outset_scores - # - # rand_tot <- matrix(0, nrow = nrow(rand_scores), ncol = ncol(rand_scores)) - # rand_max <- numeric(ncol(inset_mat)) - # rand_min <- numeric(ncol(inset_mat)) - # rand_best <- numeric(ncol(inset_mat)) - # for(j in 1:ncol(rand_tot)){ - # rand_tot[ , j] <- cumsum(rand_scores[ , j]) - # rand_max[j] <- max(rand_tot[ , j]) - # rand_min[j] <- min(rand_tot[ , j]) - # if(rand_max[j] >= abs(rand_min[j])){ - # rand_best[j] <- rand_max[j] - # } else { rand_best[j] <- rand_min[j] } - # } - # c(rand_min, rand_max, rand_best) - } - stopCluster(cl) - cat(paste0(perms, " permutations of ", expt, " complete...\n")) + ## R implementation of permutations + # + # rand_df <- input_df[sample(nrow(input_df)), , drop = F] + # rand_df$expression_val <- input_df$expression_val + # rand_df <- rand_df[ , c("item","expression_val", colnames(inset_mat)), drop = F] + # rand_scaled_scores <- scaled_scores[rand_df$item, , drop=F] + # rand_outset_scores <- outset_scores[rand_df$item, , drop=F] + # + # rand_adj_scores <- rand_scaled_scores * (abs(expr_mat)^p) + # + # rand_set_tot <- colSums(rand_adj_scores) + # rand_set_tot <- t(matrix(rep(rand_set_tot, times = nrow(rand_adj_scores)), nrow = length(rand_set_tot), dimnames = list(colnames(rand_adj_scores), rownames(rand_adj_scores)))) + # rand_scores <- (rand_adj_scores / (rand_set_tot + 0.000001)) - rand_outset_scores + # + # rand_tot <- matrix(0, nrow = nrow(rand_scores), ncol = ncol(rand_scores)) + # rand_max <- numeric(ncol(inset_mat)) + # rand_min <- numeric(ncol(inset_mat)) + # rand_best <- numeric(ncol(inset_mat)) + # for(j in 1:ncol(rand_tot)){ + # rand_tot[ , j] <- cumsum(rand_scores[ , j]) + # rand_max[j] <- max(rand_tot[ , j]) + # rand_min[j] <- min(rand_tot[ , j]) + # if(rand_max[j] >= abs(rand_min[j])){ + # rand_best[j] <- rand_max[j] + # } else { rand_best[j] <- rand_min[j] } + # } + # c(rand_min, rand_max, rand_best) + } + stopCluster(cl) + cat(paste0(perms, " permutations of ", expt, " complete...\n")) - # split output from permutations into iteration by set dataframes for random running totals, random maxes, and random mins - rand_mins <- rand_stats[ , 1:ncol(inset_mat)] - colnames(rand_mins) <- colnames(inset_mat) - rand_maxes <- rand_stats[ , (ncol(inset_mat)+1):(2*ncol(inset_mat))] - colnames(rand_maxes) <- colnames(inset_mat) - rand_best <- rand_stats[ , (2*ncol(inset_mat)+1):ncol(rand_stats)] - colnames(rand_best) <- colnames(inset_mat) + # split output from permutations into iteration by set dataframes for random running totals, random maxes, and random mins + rand_mins <- rand_stats[, 1:ncol(inset_mat)] + colnames(rand_mins) <- colnames(inset_mat) + rand_maxes <- rand_stats[, (ncol(inset_mat) + 1):(2 * ncol(inset_mat))] + colnames(rand_maxes) <- colnames(inset_mat) + rand_best <- rand_stats[, (2 * ncol(inset_mat) + 1):ncol(rand_stats)] + colnames(rand_best) <- colnames(inset_mat) - # calculate max and min NES for each set - NES_max <- running_max/(colSums((rand_best>=0)*rand_best)/(colSums(rand_best>=0) + 0.000001) + 0.000001) - NES_min <- running_min/(colSums(abs(rand_best)*(rand_best<=0))/(colSums(rand_best<=0) + 0.000001) + 0.000001) - NES_max <- NES_max * (colSums(rand_best>=0) > 0) - NES_min <- NES_min * (colSums(rand_best<=0) > 0) + # calculate max and min NES for each set + NES_max <- running_max / (colSums((rand_best >= 0) * rand_best) / (colSums(rand_best >= 0) + 0.000001) + 0.000001) + NES_min <- running_min / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001) + NES_max <- NES_max * (colSums(rand_best >= 0) > 0) + NES_min <- NES_min * (colSums(rand_best <= 0) > 0) - pval_max <- colSums(t(t(rand_best) >= running_max))/(colSums(rand_best >= 0) + 0.000001) - pval_min <- colSums(t(t(rand_best) <= running_min))/(colSums(rand_best <= 0) + 0.000001) + pval_max <- colSums(t(t(rand_best) >= running_max)) / (colSums(rand_best >= 0) + 0.000001) + pval_min <- colSums(t(t(rand_best) <= running_min)) / (colSums(rand_best <= 0) + 0.000001) - rand_mins_NES <- t((t(rand_mins)/(colSums(abs(rand_best)*(rand_best<=0))/(colSums(rand_best<=0) +0.000001) + 0.000001)) * (colSums(rand_best<=0) > 0)) - rand_maxes_NES <- t((t(rand_maxes)/(colSums(rand_best*(rand_best>=0))/(colSums(rand_best>=0) + 0.000001) + 0.000001)) * (colSums(rand_best>=0) > 0)) - #rand_mins_NES <- rand_mins_NES * (colSums(rand_best>=0) > 0) - #rand_maxes_NES <- rand_maxes_NES * (colSums(rand_best<=0) > 0) + rand_mins_NES <- t((t(rand_mins) / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001)) * (colSums(rand_best <= 0) > 0)) + rand_maxes_NES <- t((t(rand_maxes) / (colSums(rand_best * (rand_best >= 0)) / (colSums(rand_best >= 0) + 0.000001) + 0.000001)) * (colSums(rand_best >= 0) > 0)) + # rand_mins_NES <- rand_mins_NES * (colSums(rand_best>=0) > 0) + # rand_maxes_NES <- rand_maxes_NES * (colSums(rand_best<=0) > 0) - # calculate FDR for each NES - n_all_rand_min <- sum(rand_best<=0) - if(n_all_rand_min==0){ - n_all_rand_min <- 0.000001 - } - n_all_rand_max <- sum(rand_best>=0) - if(n_all_rand_max==0){ - n_all_rand_min <- 0.000001 - } + # calculate FDR for each NES + n_all_rand_min <- sum(rand_best <= 0) + if (n_all_rand_min == 0) { + n_all_rand_min <- 0.000001 + } + n_all_rand_max <- sum(rand_best >= 0) + if (n_all_rand_max == 0) { + n_all_rand_min <- 0.000001 + } - #output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 8, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr", "pos_ES", "neg_ES", "pos_NES", "neg_NES"))) - output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 4, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr"))) - output_df <- data.frame(output_mat, stringsAsFactors = F) + # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 8, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr", "pos_ES", "neg_ES", "pos_NES", "neg_NES"))) + output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 4, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr"))) + output_df <- data.frame(output_mat, stringsAsFactors = F) - for(k in 1:length(NES_min)){ - #output_df$pos_ES[k] <- running_max[k] - #output_df$neg_ES[k] <- running_min[k] - #output_df$pos_NES[k] <- NES_max[k] - #output_df$neg_NES[k] <- NES_min[k] - if(abs(running_min[k])=0) >= NES_max[k])) / n_all_rand_max - max_bottom <- sum(NES_max[running_max>=abs(running_min)] >= NES_max[k]) / (sum(running_max>=abs(running_min))) - output_df$fdr[k] <- max_top / max_bottom - } else { - output_df$ES[k] <- running_min[k] - output_df$NES[k] <- NES_min[k] - output_df$p_val[k] <- pval_min[k] - min_top <- sum(rowSums(rand_mins_NES*(rand_best<=0) <= NES_min[k])) / n_all_rand_min - min_bottom <- sum(NES_min[abs(running_min)>running_max] <= NES_min[k]) / sum(abs(running_min)>running_max) - output_df$fdr[k] <- min_top / min_bottom - } - } - if((thresh_action=="include")&(length(skipped_sets)>0)){ - new_row <- data.frame(matrix(0, nrow=1, ncol=4), stringsAsFactors = F) - colnames(new_row) <- colnames(output_df) - new_row$p_val <- 1 - new_row$fdr <- 1 - for(i in 1:length(skipped_sets)){ - rownames(new_row) <- skipped_sets[i] - output_df <- rbind(output_df, new_row) - } - } - output_df$fdr[output_df$fdr>1] <- 1 - return(list(Enrichment_Results=output_df, Running_Sums=Running_Sum, Items_in_Set=items_in_set)) + for (k in 1:length(NES_min)) { + # output_df$pos_ES[k] <- running_max[k] + # output_df$neg_ES[k] <- running_min[k] + # output_df$pos_NES[k] <- NES_max[k] + # output_df$neg_NES[k] <- NES_min[k] + if (abs(running_min[k]) < running_max[k]) { + output_df$ES[k] <- running_max[k] + output_df$NES[k] <- NES_max[k] + output_df$p_val[k] <- pval_max[k] + max_top <- sum(rowSums(rand_maxes_NES * (rand_best >= 0) >= NES_max[k])) / n_all_rand_max + max_bottom <- sum(NES_max[running_max >= abs(running_min)] >= NES_max[k]) / (sum(running_max >= abs(running_min))) + output_df$fdr[k] <- max_top / max_bottom + } else { + output_df$ES[k] <- running_min[k] + output_df$NES[k] <- NES_min[k] + output_df$p_val[k] <- pval_min[k] + min_top <- sum(rowSums(rand_mins_NES * (rand_best <= 0) <= NES_min[k])) / n_all_rand_min + min_bottom <- sum(NES_min[abs(running_min) > running_max] <= NES_min[k]) / sum(abs(running_min) > running_max) + output_df$fdr[k] <- min_top / min_bottom + } + } + if ((thresh_action == "include") & (length(skipped_sets) > 0)) { + new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) + colnames(new_row) <- colnames(output_df) + new_row$p_val <- 1 + new_row$fdr <- 1 + for (i in 1:length(skipped_sets)) { + rownames(new_row) <- skipped_sets[i] + output_df <- rbind(output_df, new_row) + } + } + output_df$fdr[output_df$fdr > 1] <- 1 + return(list(Enrichment_Results = output_df, Running_Sums = Running_Sum, Items_in_Set = items_in_set)) } @@ -392,8 +400,8 @@ swGsea <- function(input_df, thresh_type="percentile", thresh=0.9, thresh_action #' @export #' prepareGseaInput <- function(rankFile, gmtFile) { - rank <- read_tsv(rankFile, col_names=c("gene", "score"), col_types="cd") - gmt <- readGmt(gmtFile) - inputDf <- prepareInputMatrixGsea(rank, gmt) - return(inputDf) + rank <- read_tsv(rankFile, col_names = c("gene", "score"), col_types = "cd") + gmt <- readGmt(gmtFile) + inputDf <- prepareInputMatrixGsea(rank, gmt) + return(inputDf) } diff --git a/README.md b/README.md index b1c5155..7604500 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +# WebGestalt + WebGestalt R package is the R version of our well-known web application tool WebGestalt (www.webgestalt.org) that has on average 27,000 users from 140 countries and territories per year and has also been cited 371 in 2016. The advantage of this R package is that it can be easily integrated to other pipelines or simultaneously analyze multiple gene lists. WebGestaltR function can perform popular enrichment analyses: ORA (Over-Representation Analysis), GSEA (Gene Set Enrichment Analysis) and NTA (Network Topology Analysis). Based on the user-uploaded gene list or gene list with scores (for GSEA method), WebGestaltR function will first map the gene list to entrez gene IDs and then summarize the gene list based on the GO (Gene Ontology) Slim data. After performing the enrichment analysis, WebGestaltR function also returns an user-friendly HTML report containing GO Slim summary and enrichment analysis result. If the functional categories have the DAG (directed acyclic graph) structure, the structure of the enriched categories can also be visualized in the report. + +## Installation + +Since WebGestaltR v2.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 5a5693d..2ac35b7 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -7,6 +7,8 @@ name = "WebGestaltR" version = "0.1.0" dependencies = [ "extendr-api", + "ndarray", + "rustc-hash", "webgestalt_lib", ] @@ -109,35 +111,25 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "extendr-api" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e36d66fa307948c291a6fc5b09d8295dd58e88ab5e8d782d30e23670113e9ab" +checksum = "c7d55a0174c4df17c467fb59b3f836bec31d1af6d56c4182cfdf34a62d1553a4" dependencies = [ - "extendr-engine", "extendr-macros", - "lazy_static", "libR-sys", + "once_cell", "paste", ] -[[package]] -name = "extendr-engine" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8298d5a2e38bb91820b92bbd7e5aaf1d3b95ed9f096fc66393c50af38ff8155d" -dependencies = [ - "libR-sys", -] - [[package]] name = "extendr-macros" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09bf0849f0d48209be8163378248137fed5ccb5f464d171cf93a19f31a9e6c67" +checksum = "33833971650cade4bfa3097b979506bf2b4934b60392e110f95b94c2406cbb84" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.37", ] [[package]] @@ -171,9 +163,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libR-sys" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd728a97b9b0975f546bc865a7413e0ce6f98a8f6cea52e77dc5ee0bcea00adf" +checksum = "a34aaa68a201f71eab5df5a67d1326add8aaf029434e939353bcab0534919ff1" [[package]] name = "libc" @@ -241,6 +233,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + [[package]] name = "num-complex" version = "0.4.4" @@ -291,6 +296,12 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + [[package]] name = "paste" version = "1.0.14" diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 2fb08ca..6e83d46 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -8,5 +8,7 @@ crate-type = [ 'staticlib' ] name = 'WebGestaltR' [dependencies] -extendr-api = '0.4.0' +extendr-api = '0.6.0' +ndarray = "0.15.6" +rustc-hash = "1.1.0" webgestalt_lib = {git = "https://github.com/bzhanglab/webgestalt_rust.git"} diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 1b2ec30..1974418 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,10 +1,12 @@ use extendr_api::prelude::*; +use extendr_api::wrapper::dataframe::Dataframe; +use ndarray::Array2; +use rustc_hash::FxHashMap; use webgestalt_lib::methods::*; - /// Return string `"Hello world!"` to R. /// @export #[extendr] -fn hello_world() -> &'static str { +fn rust_hello_world() -> &'static str { "Hello world!" } @@ -15,10 +17,52 @@ fn gsea_rust() -> () { // webgestalt_lib::methods::gsea:: } +/// Fill relation data frame for GSEA input +/// +/// Fill 1 for gene in gene set +/// ## Diagram +/// ```shell +/// Gene Sets +/// ┌───────────┐ First column named 'gene' containing gene name +/// │A0100110100│ 1 = in set +/// Genes │B0100101000│ 0 = not in set +/// │C1011101001│ +/// └───────────┘ +/// ``` +/// @param gmt A Data Frame with geneSet and gene columns from the GMT file +/// @param genes A vector of genes +/// @param geneSets A vector of gene sets +/// +/// @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. +/// @author John Elizarraras +/// @keywords internal +/// @export +#[extendr] +pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> Robj { + let genes_vec = genes.as_string_vector().unwrap(); + let mut gene_set_vec = gene_sets.as_string_vector().unwrap(); + let mut value_array = Array2::zeros((genes.len(), gmt.len())); + let mut geneIndex: FxHashMap<&String, usize> = FxHashMap::default(); + let mut setIndex: FxHashMap<&String, usize> = FxHashMap::default(); + let gmtSet: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); + let gmtGene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); + for (i, val) in genes_vec.iter().enumerate() { + geneIndex.insert(val, i); + } + for (i, val) in gene_set_vec.iter().enumerate() { + setIndex.insert(val, i); + } + for i in 0..gmtSet.len() { + value_array[[geneIndex[&gmtGene[i]], setIndex[&gmtSet[i]]]] = 1; + } + gene_set_vec.insert(0, String::from("gene")); + data_frame!(x = 1) +} + // Macro to generate exports. // This ensures exported functions are registered with R. // See corresponding C code in `entrypoint.c`. extendr_module! { mod WebGestaltR; - fn hello_world; + fn rust_hello_world; } From f8f17cc2cdef673bc775bdbecbe917a4eaadb693 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 9 Oct 2023 11:56:11 -0500 Subject: [PATCH 17/82] adds fill_input_data_frame, but returns list --- src/rust/src/lib.rs | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 1974418..9709ef0 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -17,9 +17,17 @@ fn gsea_rust() -> () { // webgestalt_lib::methods::gsea:: } + +enum SafeTypes { + String(String), + Int(i32) +} /// Fill relation data frame for GSEA input /// /// Fill 1 for gene in gene set +/// +/// See https://github.com/extendr/extendr/issues/612 for how to export DataFrame +/// /// ## Diagram /// ```shell /// Gene Sets @@ -38,25 +46,32 @@ fn gsea_rust() -> () { /// @keywords internal /// @export #[extendr] -pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> Robj { +pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { let genes_vec = genes.as_string_vector().unwrap(); - let mut gene_set_vec = gene_sets.as_string_vector().unwrap(); + let gene_set_vec = gene_sets.as_string_vector().unwrap(); let mut value_array = Array2::zeros((genes.len(), gmt.len())); - let mut geneIndex: FxHashMap<&String, usize> = FxHashMap::default(); - let mut setIndex: FxHashMap<&String, usize> = FxHashMap::default(); - let gmtSet: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); - let gmtGene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); + let mut gene_index: FxHashMap<&String, usize> = FxHashMap::default(); + let mut set_index: FxHashMap<&String, usize> = FxHashMap::default(); + let gmt_set: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); + let gmt_gene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); for (i, val) in genes_vec.iter().enumerate() { - geneIndex.insert(val, i); + gene_index.insert(val, i); } for (i, val) in gene_set_vec.iter().enumerate() { - setIndex.insert(val, i); + set_index.insert(val, i); + } + for i in 0..gmt_set.len() { + value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; } - for i in 0..gmtSet.len() { - value_array[[geneIndex[&gmtGene[i]], setIndex[&gmtSet[i]]]] = 1; + let mut gene_set_val: Vec> = Vec::new(); + // gene_set_val.push(genes_vec.into_iter().map(|x| SafeTypes::String(x)).collect()); + for i in 0..gmt_set.len() { + gene_set_val.push(value_array.column(i).to_vec()) } - gene_set_vec.insert(0, String::from("gene")); - data_frame!(x = 1) + // gene_set_vec.insert(0, String::from("gene")); + // Construct DataFrame in R. Create list for now. + List::from_names_and_values(gene_set_vec, gene_set_val).unwrap() + // data_frame!(x = 1) } // Macro to generate exports. From 644b3a93d90910fbd194c464646a1260efc17fbf Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 9 Oct 2023 12:08:19 -0500 Subject: [PATCH 18/82] add basic output for GSEA --- src/rust/src/lib.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 9709ef0..05dac81 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,5 +1,6 @@ +use std::vec; + use extendr_api::prelude::*; -use extendr_api::wrapper::dataframe::Dataframe; use ndarray::Array2; use rustc_hash::FxHashMap; use webgestalt_lib::methods::*; @@ -11,17 +12,18 @@ fn rust_hello_world() -> &'static str { } /// Run GSEA using rust library +/// @return List of the results of GSEA /// @export #[extendr] -fn gsea_rust() -> () { +fn gsea_rust() -> List { // webgestalt_lib::methods::gsea:: + list!( + fdr = vec![0.01, 0.05, 0.1], + leading_edge = vec![4, 6, 4], + gene_sets = vec!["GO1", "GO2", "GO3"] + ) } - -enum SafeTypes { - String(String), - Int(i32) -} /// Fill relation data frame for GSEA input /// /// Fill 1 for gene in gene set @@ -34,12 +36,13 @@ enum SafeTypes { /// ┌───────────┐ First column named 'gene' containing gene name /// │A0100110100│ 1 = in set /// Genes │B0100101000│ 0 = not in set -/// │C1011101001│ -/// └───────────┘ +/// │C1011101001│ Due to limitiations with extendr-api v 0.6.0, +/// └───────────┘ function returns a list, and the R package will +/// add the first 'gene' column /// ``` /// @param gmt A Data Frame with geneSet and gene columns from the GMT file /// @param genes A vector of genes -/// @param geneSets A vector of gene sets +/// @param gene_sets A vector of gene sets /// /// @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. /// @author John Elizarraras From 77e1123a50763478f7f72f16d5d2c35bd5f54d5a Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 9 Oct 2023 14:25:10 -0500 Subject: [PATCH 19/82] add GSEA config --- src/rust/Cargo.lock | 89 +++++++++++++++++---------------------------- src/rust/src/lib.rs | 11 +++++- 2 files changed, 43 insertions(+), 57 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 2ac35b7..1bf8257 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -12,6 +12,12 @@ dependencies = [ "webgestalt_lib", ] +[[package]] +name = "adjustp" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "151571bfc58e15241979bf8efb3b25506a437483101149cdcbc2ff56276da850" + [[package]] name = "approx" version = "0.5.1" @@ -39,16 +45,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -84,9 +80,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -96,9 +92,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] @@ -129,7 +125,7 @@ checksum = "33833971650cade4bfa3097b979506bf2b4934b60392e110f95b94c2406cbb84" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -143,12 +139,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "hermit-abi" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" - [[package]] name = "itoa" version = "1.0.9" @@ -169,21 +159,21 @@ checksum = "a34aaa68a201f71eab5df5a67d1326add8aaf029434e939353bcab0534919ff1" [[package]] name = "libc" -version = "0.2.148" +version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "matrixmultiply" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" dependencies = [ "autocfg", "rawpointer", @@ -191,9 +181,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.3" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -278,24 +268,14 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "once_cell" version = "1.18.0" @@ -316,9 +296,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] @@ -380,9 +360,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ "either", "rayon-core", @@ -390,14 +370,12 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -444,7 +422,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -486,9 +464,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.37" +version = "2.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", @@ -516,8 +494,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#b8272997030eab7c902dd355a58b737b7542ffc7" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#30913178ccbdca5a7546a2f148f6a74abf0287c9" dependencies = [ + "adjustp", "csv", "rand", "rayon", @@ -528,9 +507,9 @@ dependencies = [ [[package]] name = "wide" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa469ffa65ef7e0ba0f164183697b89b854253fd31aeb92358b7b6155177d62f" +checksum = "ebecebefc38ff1860b4bc47550bbfa63af5746061cf0d29fcd7fa63171602598" dependencies = [ "bytemuck", "safe_arch", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 05dac81..fa783b8 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -3,7 +3,7 @@ use std::vec; use extendr_api::prelude::*; use ndarray::Array2; use rustc_hash::FxHashMap; -use webgestalt_lib::methods::*; +use webgestalt_lib::methods::{gsea::GSEAConfig, *}; /// Return string `"Hello world!"` to R. /// @export #[extendr] @@ -15,8 +15,15 @@ fn rust_hello_world() -> &'static str { /// @return List of the results of GSEA /// @export #[extendr] -fn gsea_rust() -> List { +fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { // webgestalt_lib::methods::gsea:: + // + let config = GSEAConfig { + min_overlap: min_overlap.as_integer().unwrap(), + max_overlap: max_overlap.as_integer().unwrap(), + ..Default::default() + }; + let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config); list!( fdr = vec![0.01, 0.05, 0.1], leading_edge = vec![4, 6, 4], From cf0ec6e5adaaaa8001b9782b35f81e652d5414be Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 10 Oct 2023 13:57:49 -0500 Subject: [PATCH 20/82] add GSEA vector creation --- src/rust/src/lib.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index fa783b8..128faa0 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -23,11 +23,29 @@ fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { max_overlap: max_overlap.as_integer().unwrap(), ..Default::default() }; - let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config); + let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config); // TODO: Convert + // dataframe to GMT + let mut fdr: Vec = Vec::new(); + let mut p: Vec = Vec::new(); + let mut leading_edge: Vec = Vec::new(); + let mut gene_sets: Vec = Vec::new(); + let mut es: Vec = Vec::new(); + let mut nes: Vec = Vec::new(); + for row in res { + fdr.push(row.fdr); + p.push(row.p); + leading_edge.push(row.leading_edge); + gene_sets.push(row.set); + es.push(row.es); + nes.push(row.nes); + } list!( - fdr = vec![0.01, 0.05, 0.1], - leading_edge = vec![4, 6, 4], - gene_sets = vec!["GO1", "GO2", "GO3"] + fdr = fdr, + p = p, + es = es, + nes = nes, + leading_edge = leading_edge, + gene_sets = gene_sets, ) } From 3f88cec0d5c5df13ed282441d419d475e5deaefa Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 11 Oct 2023 09:53:16 -0500 Subject: [PATCH 21/82] convert list to dataframe using as.data.frame --- R/readGmt.R | 102 ++++++++++++++++++++++---------------------- src/rust/src/lib.rs | 1 - 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/R/readGmt.R b/R/readGmt.R index 3f2a00b..c6590d3 100644 --- a/R/readGmt.R +++ b/R/readGmt.R @@ -9,46 +9,46 @@ #' @importFrom tools file_ext #' @export #' -readGmt <- function(gmtFile, cache=NULL) { -#####Change a gmt file to a three column matrix (gene set name, gene set description and genes)####### - if (startsWith(gmtFile, "http://") || startsWith(gmtFile, "https://")) { - response <- cacheUrl(gmtFile, cache) - if (response$status_code == 200) { - data <- unlist(strsplit(content(response, "text"), "\n", fixed=TRUE)) - } else { - stop(webRequestError(response)) - } - } else { - if (file_ext(gmtFile) != "gmt") { - stop(gmtFormatError("empty")) - } - # remove BOM in some windows files - data <- gsub("\xEF\xBB\xBF", "", readLines(gmtFile, skipNul=TRUE), useBytes=TRUE) - } - data <- strsplit(data, "\t", useBytes=TRUE) - data <- lapply(data,.toList) - data <- do.call("rbind",data) +readGmt <- function(gmtFile, cache = NULL) { + ##### Change a gmt file to a three column matrix (gene set name, gene set description and genes)####### + if (startsWith(gmtFile, "http://") || startsWith(gmtFile, "https://")) { + response <- cacheUrl(gmtFile, cache) + if (response$status_code == 200) { + data <- unlist(strsplit(content(response, "text"), "\n", fixed = TRUE)) + } else { + stop(webRequestError(response)) + } + } else { + if (file_ext(gmtFile) != "gmt") { + stop(gmtFormatError("empty")) + } + # remove BOM in some windows files + data <- gsub("\xEF\xBB\xBF", "", readLines(gmtFile, skipNul = TRUE), useBytes = TRUE) + } + data <- strsplit(data, "\t", useBytes = TRUE) + data <- lapply(data, .toList) + data <- do.call("rbind", data) - if (is.null(data)) { - stop(gmtFormatError("incorrect")) - } else { - data <- as.data.frame(data, stringsAsFactors=FALSE) - colnames(data) <- c("geneSet", "description", "gene") - return(data) - } + if (is.null(data)) { + stop(gmtFormatError("incorrect")) + } else { + data <- as.data.frame(data, stringsAsFactors = FALSE) + colnames(data) <- c("geneSet", "description", "gene") + return(data) + } } readGMT <- readGmt .toList <- function(data) { - if (length(data)>2) { - data <- data[!is.na(data) & !is.null(data)] - # replace % in gene set names to _, because png treats % in filename specially - data1 <- cbind(rep(gsub('%', '_', data[1], fixed=TRUE), length(data)-2), rep(data[2], length(data)-2), data[c(-1,-2)]) - return(data1) - } else { - return(NULL) - } + if (length(data) > 2) { + data <- data[!is.na(data) & !is.null(data)] + # replace % in gene set names to _, because png treats % in filename specially + data1 <- cbind(rep(gsub("%", "_", data[1], fixed = TRUE), length(data) - 2), rep(data[2], length(data) - 2), data[c(-1, -2)]) + return(data1) + } else { + return(NULL) + } } #' Prepare Input Matrix for GSEA @@ -62,23 +62,25 @@ readGMT <- readGmt #' @export #' prepareInputMatrixGsea <- function(rank, gmt) { - genes <- rank$gene - gmt <- gmt %>% filter(.data$gene %in% genes) - geneSets <- (gmt %>% select(.data$geneSet, .data$description) %>% distinct())$geneSet - # 0 or 1 matrix indicating gene and gene set relationship - rel <- fillInputDataFrame(gmt, genes, geneSets) - # R implementation - # rel <- matrix(0, nrow=length(genes), ncol=length(geneSets), dimnames=list(genes, geneSets)) - # - # for (i in 1:nrow(gmt)) { - # rel[gmt[i, "gene"], gmt[i, "geneSet"]] <- 1 - # } - # rel <- as.data.frame(rel) - # rel$gene <- genes - return(inner_join(rank, rel, by="gene")) + genes <- rank$gene + gmt <- gmt %>% filter(.data$gene %in% genes) + geneSets <- (gmt %>% select(.data$geneSet, .data$description) %>% distinct())$geneSet + # 0 or 1 matrix indicating gene and gene set relationship + # C++ implementation + # rel <- fillInputDataFrame(gmt, genes, geneSets) + rel <- as.data.frame(fill_input_data_frame(gmt, genes, geneSets)) + # R implementation + # rel <- matrix(0, nrow=length(genes), ncol=length(geneSets), dimnames=list(genes, geneSets)) + # + # for (i in 1:nrow(gmt)) { + # rel[gmt[i, "gene"], gmt[i, "geneSet"]] <- 1 + # } + # rel <- as.data.frame(rel) + # rel$gene <- genes + return(inner_join(rank, rel, by = "gene")) } readGMT <- function(...) { - warning("Function readGMT is deprecated and changed to readGmt!\n") - return(readGmt(...)) + warning("Function readGMT is deprecated and changed to readGmt!\n") + return(readGmt(...)) } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 128faa0..7572597 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -17,7 +17,6 @@ fn rust_hello_world() -> &'static str { #[extendr] fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { // webgestalt_lib::methods::gsea:: - // let config = GSEAConfig { min_overlap: min_overlap.as_integer().unwrap(), max_overlap: max_overlap.as_integer().unwrap(), From 7eddc1f6ed80a4ff02fb5775127d931f725561a6 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 11 Oct 2023 17:16:46 -0500 Subject: [PATCH 22/82] first version of Rust GSEA R package runs, but the names of rows/columns are not correct in the final dataframe --- .Rbuildignore | 2 + .cargo/config.toml | 2 + NAMESPACE | 6 +- R/RcppExports.R | 31 ---- R/WebGestaltR-package.R | 18 +- R/clara_cluster.R | 2 +- R/extendr-wrappers.R | 40 ++++- R/linkModification.R | 9 +- R/readGmt.R | 3 +- R/swGsea.R | 336 ++++++++++++++++++++------------------ WebGestaltR.Rproj | 17 ++ build_hash.py | 29 ++++ install | 4 +- man/WebGestaltR.Rd | 10 ++ man/fillInputDataFrame.Rd | 25 --- man/gseaPermutation.Rd | 25 --- man/linkModification.Rd | 8 +- man/rust_hello_world.Rd | 11 ++ src/RcppExports.cpp | 52 ------ src/fillInputMatrix.cpp | 43 ----- src/permutation.cpp | 65 -------- src/rust/Cargo.lock | 2 +- src/rust/src/lib.rs | 62 ++++++- 23 files changed, 372 insertions(+), 430 deletions(-) create mode 100644 .cargo/config.toml delete mode 100644 R/RcppExports.R create mode 100644 WebGestaltR.Rproj create mode 100644 build_hash.py delete mode 100644 man/fillInputDataFrame.Rd delete mode 100644 man/gseaPermutation.Rd create mode 100644 man/rust_hello_world.Rd delete mode 100644 src/RcppExports.cpp delete mode 100644 src/fillInputMatrix.cpp delete mode 100644 src/permutation.cpp diff --git a/.Rbuildignore b/.Rbuildignore index a03a6ba..a3f921f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,3 @@ ^src/\.cargo$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..c91c3f3 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[net] +git-fetch-with-cli = true diff --git a/NAMESPACE b/NAMESPACE index 45d3daa..6b0b175 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,11 @@ # Generated by roxygen2: do not edit by hand +export() export(GOSlimSummary) export(IDMapping) export(WebGestaltR) export(WebGestaltRBatch) export(WebGestaltR_batch) -export(kMedoid) -export(clara_cluster) export(affinityPropagation) export(formatCheck) export(goSlimSummary) @@ -23,6 +22,7 @@ export(loadGeneSet) export(prepareGseaInput) export(prepareInputMatrixGsea) export(readGmt) +export(rust_hello_world) export(swGsea) export(weightedSetCover) import(grDevices) @@ -31,7 +31,6 @@ import(methods) import(utils) importFrom(Rcpp,sourceCpp) importFrom(apcluster,apcluster) -importFrom(cluster, pam, clara) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(dplyr,"%>%") @@ -76,3 +75,4 @@ importFrom(tools,file_ext) importFrom(whisker,rowSplit) importFrom(whisker,whisker.render) useDynLib(WebGestaltR) +useDynLib(WebGestaltR, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R deleted file mode 100644 index 0a91519..0000000 --- a/R/RcppExports.R +++ /dev/null @@ -1,31 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#' Fill relation data frame for GSEA input -#' -#' Fill 1 for gene in gene set -#' -#' @param gmt A Data Frame with geneSet and gene columns from the GMT file -#' @param genes A vector of genes -#' @param geneSets A vector of gene sets -#' -#' @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. -#' @author Yuxing Liao -#' @keywords internal -fillInputDataFrame <- function(gmt, genes, geneSets) { - .Call('_WebGestaltR_fillInputDataFrame', PACKAGE = 'WebGestaltR', gmt, genes, geneSets) -} - -#' Permutaion in GSEA algorithm -#' -#' @param inset_scores Scaled score matrix for genes in sets -#' @param outset_scores Normalized score matrix for genes not in sets -#' @param expression_value Vector of gene rank scores -#' -#' @return A vector of concatenated random minimal,maimum and best running sum scores for each set. -#' @author Yuxing Liao -#' @keywords internal -gseaPermutation <- function(inset_scores, outset_scores, expression_value) { - .Call('_WebGestaltR_gseaPermutation', PACKAGE = 'WebGestaltR', inset_scores, outset_scores, expression_value) -} - diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index 0875296..fa17344 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -1,3 +1,4 @@ + #' WebGestaltR: The R interface for enrichment analysis with WebGestalt. #' #' @docType package @@ -14,9 +15,16 @@ NULL .onAttach <- function(lib, pkg) { - packageStartupMessage("******************************************\n") - packageStartupMessage("* *\n") - packageStartupMessage("* Welcome to WebGestaltR ! *\n") - packageStartupMessage("* *\n") - packageStartupMessage("******************************************\n") + packageStartupMessage("****************************************** +") + packageStartupMessage("* * +") + packageStartupMessage("* Welcome to WebGestaltR-rust! * +") + packageStartupMessage("* 17:11:26.489349 * +") + packageStartupMessage("* * +") + packageStartupMessage("****************************************** +") } diff --git a/R/clara_cluster.R b/R/clara_cluster.R index bbdf5b7..69849f0 100644 --- a/R/clara_cluster.R +++ b/R/clara_cluster.R @@ -5,7 +5,7 @@ clara_cluster <- function(idsInSet, score){ #sort clusters to make exemplar the first member clusters <- vector(mode="list", length(kmRes$medoids)) - print(kmRes$clusinfo) + # print(kmRes$clusinfo) if(length(kmRes$medoids) == 0){ return(NULL) } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 82aa07a..46f7b31 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1,5 +1,11 @@ +# Generated by extendr: Do not edit by hand + # nolint start +# +# This file was created with the following call: +# .Call("wrap__make_WebGestaltR_wrappers", use_symbols = TRUE, package_name = "WebGestaltR") + #' @docType package #' @usage NULL #' @useDynLib WebGestaltR, .registration = TRUE @@ -7,6 +13,38 @@ NULL #' Return string `"Hello world!"` to R. #' @export -hello_world <- function() .Call(wrap__hello_world) +rust_hello_world <- function() .Call(wrap__rust_hello_world) + +#' Fill relation data frame for GSEA input +#' +#' Fill 1 for gene in gene set +#' +#' See https://github.com/extendr/extendr/issues/612 for how to export DataFrame +#' +#' ## Diagram +#' ```shell +#' Gene Sets +#' ┌───────────┐ First column named 'gene' containing gene name +#' │A0100110100│ 1 = in set +#' Genes │B0100101000│ 0 = not in set +#' │C1011101001│ Due to limitiations with extendr-api v 0.6.0, +#' └───────────┘ function returns a list, and the R package will +#' add the first 'gene' column +#' ``` +#' @param gmt A Data Frame with geneSet and gene columns from the GMT file +#' @param genes A vector of genes +#' @param gene_sets A vector of gene sets +#' +#' @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. +#' @author John Elizarraras +#' @keywords internal +#' @export +fill_input_data_frame <- function(gmt, genes, gene_sets) .Call(wrap__fill_input_data_frame, gmt, genes, gene_sets) + +#' Run GSEA using rust library +#' @return List of the results of GSEA +#' @export +gsea_rust <- function(min_overlap, max_overlap, sets, parts, analytes, ranks) .Call(wrap__gsea_rust, min_overlap, max_overlap, sets, parts, analytes, ranks) + # nolint end diff --git a/R/linkModification.R b/R/linkModification.R index 9342fe1..ea9ca5d 100644 --- a/R/linkModification.R +++ b/R/linkModification.R @@ -4,9 +4,9 @@ #' #' @keywords internal linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap, hostName = "https://www.webgestalt.org/") { - print("link modification") - print(enrichPathwayLink) - print(interestingGeneMap$standardId) + # print("link modification") + # print(enrichPathwayLink) + # print(interestingGeneMap$standardId) if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { print("kegg metabolite link modified") link <- keggMetaboliteLinkModification(enrichPathwayLink, geneList, interestingGeneMap, hostName) @@ -14,8 +14,7 @@ linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interest } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { link <- wikiMetaboliteLinkModification(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap, hostName) return(link) - } - else if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE)) { + } else if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE)) { link <- keggLinkModification(enrichPathwayLink, geneList) return(link) } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE)) { diff --git a/R/readGmt.R b/R/readGmt.R index c6590d3..0a01fa6 100644 --- a/R/readGmt.R +++ b/R/readGmt.R @@ -66,9 +66,10 @@ prepareInputMatrixGsea <- function(rank, gmt) { gmt <- gmt %>% filter(.data$gene %in% genes) geneSets <- (gmt %>% select(.data$geneSet, .data$description) %>% distinct())$geneSet # 0 or 1 matrix indicating gene and gene set relationship - # C++ implementation + # C++ implementation # rel <- fillInputDataFrame(gmt, genes, geneSets) rel <- as.data.frame(fill_input_data_frame(gmt, genes, geneSets)) + rel$gene <- genes # R implementation # rel <- matrix(0, nrow=length(genes), ncol=length(geneSets), dimnames=list(genes, geneSets)) # diff --git a/R/swGsea.R b/R/swGsea.R index 8f3a789..75fb40d 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -104,7 +104,7 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac expt <- colnames(input_df)[2] enr_test <- colnames(input_df)[3:ncol(input_df)] colnames(input_df)[c(1, 2)] <- c("item", "expression_val") - input_df <- arrange(input_df, desc(.data$expression_val)) + # input_df <- arrange(input_df, desc(.data$expression_val)) # get and check size of set items; build in-set matrix of 1's for items in set and 0's for items not in set inset_mat <- matrix(0, nrow = length(input_df$item), ncol = length(enr_test)) @@ -204,185 +204,199 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac # generate list containing names of items in each set and ranks of those items items_in_set <- list() + rust_parts <- list() for (it in 1:ncol(inset_mat)) { items_in_set[[colnames(inset_mat)[it]]] <- data.frame(which(inset_mat[, it] == 1), stringsAsFactors = F) - colnames(items_in_set[[colnames(inset_mat)[it]]]) <- "rank" + rust_parts[[it]] <- rownames(items_in_set[[colnames(inset_mat)[it]]]) + colnames(items_in_set[[colnames(inset_mat)[it]]]) <- "rank" } + rust_analytes <- input_df[, 1] + rust_ranks <- input_df[, 2] + rust_sets <- colnames(inset_mat) + print(rust_parts) + + rust_result <- gsea_rust(15, 500, rust_sets, rust_parts, rust_analytes, rust_ranks) + # print(rust_result) + output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) + output_df$fdr[output_df$fdr > 1] <- 1 + rownames(output_df) <- rust_result$gene_sets + return(list(Enrichment_Results = output_df, Running_Sums = rust_result$running_Sum, Items_in_Set = items_in_set)) + # adjust set scores to range from a minimum of 0+pc to a maximum of 1 (s=(Score-minScore+pc)/(maxScore-minScore+pc)); calculate s^q * |r|^p score for each item in each set; get sum of these scores for each set - if (max_score == "max") { - max_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - } else { - max_scores <- max_score - } - if (min_score == "min") { - min_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - } else { - min_scores <- min_score - } - scaled_scores <- inset_mat - set_scores <- inset_mat * input_df[, colnames(inset_mat)] - set_tot <- vector(mode = "numeric", length = ncol(inset_mat)) - adj_expr_val <- (abs(input_df$expression_val))^p - for (d in 1:ncol(set_scores)) { - if (max_score == "max") { - max_scores[d] <- max(set_scores[, d]) - } - if (min_score == "min") { - min_scores[d] <- min(set_scores[set_scores[, d] > 0, d]) - } - set_scores[set_scores[, d] > max_scores[d], d] <- max_scores[d] - set_scores[set_scores[, d] < min_scores[d], d] <- min_scores[d] + # if (max_score == "max") { + # max_scores <- vector(mode = "numeric", length = ncol(inset_mat)) + # } else { + # max_scores <- max_score + # } + # if (min_score == "min") { + # min_scores <- vector(mode = "numeric", length = ncol(inset_mat)) + # } else { + # min_scores <- min_score + # } + # scaled_scores <- inset_mat + # set_scores <- inset_mat * input_df[, colnames(inset_mat)] + # set_tot <- vector(mode = "numeric", length = ncol(inset_mat)) + # adj_expr_val <- (abs(input_df$expression_val))^p + # for (d in 1:ncol(set_scores)) { + # if (max_score == "max") { + # max_scores[d] <- max(set_scores[, d]) + # } + # if (min_score == "min") { + # min_scores[d] <- min(set_scores[set_scores[, d] > 0, d]) + # } + # set_scores[set_scores[, d] > max_scores[d], d] <- max_scores[d] + # set_scores[set_scores[, d] < min_scores[d], d] <- min_scores[d] - # if the max set score equals the minimum set score, all scores for items in set will be 1; therefore, let user know that this analysis now reverts to standard GSEA - # if(max_scores[d]==min_scores[d]){ - # print(paste0("max score for set ", colnames(inset_mat)[d], " equals min score; results will be equivalent to standard GSEA")) - # } + # # if the max set score equals the minimum set score, all scores for items in set will be 1; therefore, let user know that this analysis now reverts to standard GSEA + # # if(max_scores[d]==min_scores[d]){ + # # print(paste0("max score for set ", colnames(inset_mat)[d], " equals min score; results will be equivalent to standard GSEA")) + # # } - # scales scores for sets - scaled_scores[, d] <- ((set_scores[, d] - min_scores[d] + pc) / (max_scores[d] - min_scores[d] + pc))^q - # multiple by inset_mat to set scores for items not in set to 0 - scaled_scores[, d] <- scaled_scores[, d] * inset_mat[, d] - set_scores[, d] <- scaled_scores[, d] * adj_expr_val - if (sum(set_scores[, d]) == 0) { - set_scores[, d] <- scaled_scores[, d] - } - set_tot[d] <- sum(set_scores[, d]) - } + # # scales scores for sets + # scaled_scores[, d] <- ((set_scores[, d] - min_scores[d] + pc) / (max_scores[d] - min_scores[d] + pc))^q + # # multiple by inset_mat to set scores for items not in set to 0 + # scaled_scores[, d] <- scaled_scores[, d] * inset_mat[, d] + # set_scores[, d] <- scaled_scores[, d] * adj_expr_val + # if (sum(set_scores[, d]) == 0) { + # set_scores[, d] <- scaled_scores[, d] + # } + # set_tot[d] <- sum(set_scores[, d]) + # } - # get set of items not in in_set - outset_mat <- 1 - inset_mat + # # get set of items not in in_set + # outset_mat <- 1 - inset_mat - # convert to vectors to matrices with same dimensions as inset_mat to use to calculate Running_Sum and for permutations below - # expr_mat <- matrix(rep(input_df$expression_val, times = ncol(inset_mat)), nrow = nrow(input_df), dimnames = list(rownames(inset_mat), colnames(inset_mat))) - set_tot <- t(matrix(rep(set_tot, times = nrow(inset_mat)), nrow = length(set_tot), dimnames = list(colnames(inset_mat), rownames(inset_mat)))) - outset_mat_sums <- t(matrix(rep(colSums(outset_mat), times = nrow(outset_mat)), nrow = ncol(outset_mat), dimnames = list(colnames(outset_mat), rownames(outset_mat)))) - outset_scores <- outset_mat / outset_mat_sums - scores_mat <- (set_scores / set_tot) - outset_scores + # # convert to vectors to matrices with same dimensions as inset_mat to use to calculate Running_Sum and for permutations below + # # expr_mat <- matrix(rep(input_df$expression_val, times = ncol(inset_mat)), nrow = nrow(input_df), dimnames = list(rownames(inset_mat), colnames(inset_mat))) + # set_tot <- t(matrix(rep(set_tot, times = nrow(inset_mat)), nrow = length(set_tot), dimnames = list(colnames(inset_mat), rownames(inset_mat)))) + # outset_mat_sums <- t(matrix(rep(colSums(outset_mat), times = nrow(outset_mat)), nrow = ncol(outset_mat), dimnames = list(colnames(outset_mat), rownames(outset_mat)))) + # outset_scores <- outset_mat / outset_mat_sums + # scores_mat <- (set_scores / set_tot) - outset_scores - # walk through ranked list and tally running total; also keep track of maximum and minimum values - Running_Sum <- matrix(0, ncol = ncol(scores_mat), nrow = nrow(scores_mat), dimnames = dimnames(scores_mat)) - running_max <- vector(mode = "numeric", length = ncol(scores_mat)) - running_min <- vector(mode = "numeric", length = ncol(scores_mat)) - for (e in 1:ncol(Running_Sum)) { - Running_Sum[, e] <- cumsum(scores_mat[, e]) - running_max[e] <- max(Running_Sum[, e]) - running_min[e] <- min(Running_Sum[, e]) - } + # # walk through ranked list and tally running total; also keep track of maximum and minimum values + # Running_Sum <- matrix(0, ncol = ncol(scores_mat), nrow = nrow(scores_mat), dimnames = dimnames(scores_mat)) + # running_max <- vector(mode = "numeric", length = ncol(scores_mat)) + # running_min <- vector(mode = "numeric", length = ncol(scores_mat)) + # for (e in 1:ncol(Running_Sum)) { + # Running_Sum[, e] <- cumsum(scores_mat[, e]) + # running_max[e] <- max(Running_Sum[, e]) + # running_min[e] <- min(Running_Sum[, e]) + # } - # permute df 1000x and use to determine p-value of max/min score from previous section - set.seed(rng_seed) - # FORK is an option for Unix machines to reduce memory footprint - if (fork == T) { - cl <- makeCluster(nThreads, type = "FORK") - } else { - cl <- makeCluster(nThreads) - } - registerDoParallel(cl) - # use dorng instead of dopar to properly pass rng seed to foreach loop - rand_stats <- foreach(i = 1:perms, .combine = "rbind") %dorng% { - gseaPermutation(scaled_scores, outset_scores, adj_expr_val) + # # permute df 1000x and use to determine p-value of max/min score from previous section + # set.seed(rng_seed) + # # FORK is an option for Unix machines to reduce memory footprint + # if (fork == T) { + # cl <- makeCluster(nThreads, type = "FORK") + # } else { + # cl <- makeCluster(nThreads) + # } + # registerDoParallel(cl) + # # use dorng instead of dopar to properly pass rng seed to foreach loop + # rand_stats <- foreach(i = 1:perms, .combine = "rbind") %dorng% { + # gseaPermutation(scaled_scores, outset_scores, adj_expr_val) - ## R implementation of permutations - # - # rand_df <- input_df[sample(nrow(input_df)), , drop = F] - # rand_df$expression_val <- input_df$expression_val - # rand_df <- rand_df[ , c("item","expression_val", colnames(inset_mat)), drop = F] - # rand_scaled_scores <- scaled_scores[rand_df$item, , drop=F] - # rand_outset_scores <- outset_scores[rand_df$item, , drop=F] - # - # rand_adj_scores <- rand_scaled_scores * (abs(expr_mat)^p) - # - # rand_set_tot <- colSums(rand_adj_scores) - # rand_set_tot <- t(matrix(rep(rand_set_tot, times = nrow(rand_adj_scores)), nrow = length(rand_set_tot), dimnames = list(colnames(rand_adj_scores), rownames(rand_adj_scores)))) - # rand_scores <- (rand_adj_scores / (rand_set_tot + 0.000001)) - rand_outset_scores - # - # rand_tot <- matrix(0, nrow = nrow(rand_scores), ncol = ncol(rand_scores)) - # rand_max <- numeric(ncol(inset_mat)) - # rand_min <- numeric(ncol(inset_mat)) - # rand_best <- numeric(ncol(inset_mat)) - # for(j in 1:ncol(rand_tot)){ - # rand_tot[ , j] <- cumsum(rand_scores[ , j]) - # rand_max[j] <- max(rand_tot[ , j]) - # rand_min[j] <- min(rand_tot[ , j]) - # if(rand_max[j] >= abs(rand_min[j])){ - # rand_best[j] <- rand_max[j] - # } else { rand_best[j] <- rand_min[j] } - # } - # c(rand_min, rand_max, rand_best) - } - stopCluster(cl) - cat(paste0(perms, " permutations of ", expt, " complete...\n")) + # ## R implementation of permutations + # # + # # rand_df <- input_df[sample(nrow(input_df)), , drop = F] + # # rand_df$expression_val <- input_df$expression_val + # # rand_df <- rand_df[ , c("item","expression_val", colnames(inset_mat)), drop = F] + # # rand_scaled_scores <- scaled_scores[rand_df$item, , drop=F] + # # rand_outset_scores <- outset_scores[rand_df$item, , drop=F] + # # + # # rand_adj_scores <- rand_scaled_scores * (abs(expr_mat)^p) + # # + # # rand_set_tot <- colSums(rand_adj_scores) + # # rand_set_tot <- t(matrix(rep(rand_set_tot, times = nrow(rand_adj_scores)), nrow = length(rand_set_tot), dimnames = list(colnames(rand_adj_scores), rownames(rand_adj_scores)))) + # # rand_scores <- (rand_adj_scores / (rand_set_tot + 0.000001)) - rand_outset_scores + # # + # # rand_tot <- matrix(0, nrow = nrow(rand_scores), ncol = ncol(rand_scores)) + # # rand_max <- numeric(ncol(inset_mat)) + # # rand_min <- numeric(ncol(inset_mat)) + # # rand_best <- numeric(ncol(inset_mat)) + # # for(j in 1:ncol(rand_tot)){ + # # rand_tot[ , j] <- cumsum(rand_scores[ , j]) + # # rand_max[j] <- max(rand_tot[ , j]) + # # rand_min[j] <- min(rand_tot[ , j]) + # # if(rand_max[j] >= abs(rand_min[j])){ + # # rand_best[j] <- rand_max[j] + # # } else { rand_best[j] <- rand_min[j] } + # # } + # # c(rand_min, rand_max, rand_best) + # } + # stopCluster(cl) + # cat(paste0(perms, " permutations of ", expt, " complete...\n")) - # split output from permutations into iteration by set dataframes for random running totals, random maxes, and random mins - rand_mins <- rand_stats[, 1:ncol(inset_mat)] - colnames(rand_mins) <- colnames(inset_mat) - rand_maxes <- rand_stats[, (ncol(inset_mat) + 1):(2 * ncol(inset_mat))] - colnames(rand_maxes) <- colnames(inset_mat) - rand_best <- rand_stats[, (2 * ncol(inset_mat) + 1):ncol(rand_stats)] - colnames(rand_best) <- colnames(inset_mat) + # # split output from permutations into iteration by set dataframes for random running totals, random maxes, and random mins + # rand_mins <- rand_stats[, 1:ncol(inset_mat)] + # colnames(rand_mins) <- colnames(inset_mat) + # rand_maxes <- rand_stats[, (ncol(inset_mat) + 1):(2 * ncol(inset_mat))] + # colnames(rand_maxes) <- colnames(inset_mat) + # rand_best <- rand_stats[, (2 * ncol(inset_mat) + 1):ncol(rand_stats)] + # colnames(rand_best) <- colnames(inset_mat) - # calculate max and min NES for each set - NES_max <- running_max / (colSums((rand_best >= 0) * rand_best) / (colSums(rand_best >= 0) + 0.000001) + 0.000001) - NES_min <- running_min / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001) - NES_max <- NES_max * (colSums(rand_best >= 0) > 0) - NES_min <- NES_min * (colSums(rand_best <= 0) > 0) + # # calculate max and min NES for each set + # NES_max <- running_max / (colSums((rand_best >= 0) * rand_best) / (colSums(rand_best >= 0) + 0.000001) + 0.000001) + # NES_min <- running_min / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001) + # NES_max <- NES_max * (colSums(rand_best >= 0) > 0) + # NES_min <- NES_min * (colSums(rand_best <= 0) > 0) - pval_max <- colSums(t(t(rand_best) >= running_max)) / (colSums(rand_best >= 0) + 0.000001) - pval_min <- colSums(t(t(rand_best) <= running_min)) / (colSums(rand_best <= 0) + 0.000001) + # pval_max <- colSums(t(t(rand_best) >= running_max)) / (colSums(rand_best >= 0) + 0.000001) + # pval_min <- colSums(t(t(rand_best) <= running_min)) / (colSums(rand_best <= 0) + 0.000001) - rand_mins_NES <- t((t(rand_mins) / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001)) * (colSums(rand_best <= 0) > 0)) - rand_maxes_NES <- t((t(rand_maxes) / (colSums(rand_best * (rand_best >= 0)) / (colSums(rand_best >= 0) + 0.000001) + 0.000001)) * (colSums(rand_best >= 0) > 0)) - # rand_mins_NES <- rand_mins_NES * (colSums(rand_best>=0) > 0) - # rand_maxes_NES <- rand_maxes_NES * (colSums(rand_best<=0) > 0) + # rand_mins_NES <- t((t(rand_mins) / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001)) * (colSums(rand_best <= 0) > 0)) + # rand_maxes_NES <- t((t(rand_maxes) / (colSums(rand_best * (rand_best >= 0)) / (colSums(rand_best >= 0) + 0.000001) + 0.000001)) * (colSums(rand_best >= 0) > 0)) + # # rand_mins_NES <- rand_mins_NES * (colSums(rand_best>=0) > 0) + # # rand_maxes_NES <- rand_maxes_NES * (colSums(rand_best<=0) > 0) - # calculate FDR for each NES - n_all_rand_min <- sum(rand_best <= 0) - if (n_all_rand_min == 0) { - n_all_rand_min <- 0.000001 - } - n_all_rand_max <- sum(rand_best >= 0) - if (n_all_rand_max == 0) { - n_all_rand_min <- 0.000001 - } + # # calculate FDR for each NES + # n_all_rand_min <- sum(rand_best <= 0) + # if (n_all_rand_min == 0) { + # n_all_rand_min <- 0.000001 + # } + # n_all_rand_max <- sum(rand_best >= 0) + # if (n_all_rand_max == 0) { + # n_all_rand_min <- 0.000001 + # } - # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 8, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr", "pos_ES", "neg_ES", "pos_NES", "neg_NES"))) - output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 4, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr"))) - output_df <- data.frame(output_mat, stringsAsFactors = F) + # # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 8, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr", "pos_ES", "neg_ES", "pos_NES", "neg_NES"))) + # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 4, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr"))) + # output_df <- data.frame(output_mat, stringsAsFactors = F) - for (k in 1:length(NES_min)) { - # output_df$pos_ES[k] <- running_max[k] - # output_df$neg_ES[k] <- running_min[k] - # output_df$pos_NES[k] <- NES_max[k] - # output_df$neg_NES[k] <- NES_min[k] - if (abs(running_min[k]) < running_max[k]) { - output_df$ES[k] <- running_max[k] - output_df$NES[k] <- NES_max[k] - output_df$p_val[k] <- pval_max[k] - max_top <- sum(rowSums(rand_maxes_NES * (rand_best >= 0) >= NES_max[k])) / n_all_rand_max - max_bottom <- sum(NES_max[running_max >= abs(running_min)] >= NES_max[k]) / (sum(running_max >= abs(running_min))) - output_df$fdr[k] <- max_top / max_bottom - } else { - output_df$ES[k] <- running_min[k] - output_df$NES[k] <- NES_min[k] - output_df$p_val[k] <- pval_min[k] - min_top <- sum(rowSums(rand_mins_NES * (rand_best <= 0) <= NES_min[k])) / n_all_rand_min - min_bottom <- sum(NES_min[abs(running_min) > running_max] <= NES_min[k]) / sum(abs(running_min) > running_max) - output_df$fdr[k] <- min_top / min_bottom - } - } - if ((thresh_action == "include") & (length(skipped_sets) > 0)) { - new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) - colnames(new_row) <- colnames(output_df) - new_row$p_val <- 1 - new_row$fdr <- 1 - for (i in 1:length(skipped_sets)) { - rownames(new_row) <- skipped_sets[i] - output_df <- rbind(output_df, new_row) - } - } - output_df$fdr[output_df$fdr > 1] <- 1 - return(list(Enrichment_Results = output_df, Running_Sums = Running_Sum, Items_in_Set = items_in_set)) + # for (k in 1:length(NES_min)) { + # # output_df$pos_ES[k] <- running_max[k] + # # output_df$neg_ES[k] <- running_min[k] + # # output_df$pos_NES[k] <- NES_max[k] + # # output_df$neg_NES[k] <- NES_min[k] + # if (abs(running_min[k]) < running_max[k]) { + # output_df$ES[k] <- running_max[k] + # output_df$NES[k] <- NES_max[k] + # output_df$p_val[k] <- pval_max[k] + # max_top <- sum(rowSums(rand_maxes_NES * (rand_best >= 0) >= NES_max[k])) / n_all_rand_max + # max_bottom <- sum(NES_max[running_max >= abs(running_min)] >= NES_max[k]) / (sum(running_max >= abs(running_min))) + # output_df$fdr[k] <- max_top / max_bottom + # } else { + # output_df$ES[k] <- running_min[k] + # output_df$NES[k] <- NES_min[k] + # output_df$p_val[k] <- pval_min[k] + # min_top <- sum(rowSums(rand_mins_NES * (rand_best <= 0) <= NES_min[k])) / n_all_rand_min + # min_bottom <- sum(NES_min[abs(running_min) > running_max] <= NES_min[k]) / sum(abs(running_min) > running_max) + # output_df$fdr[k] <- min_top / min_bottom + # } + # } + # if ((thresh_action == "include") & (length(skipped_sets) > 0)) { + # new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) + # colnames(new_row) <- colnames(output_df) + # new_row$p_val <- 1 + # new_row$fdr <- 1 + # for (i in 1:length(skipped_sets)) { + # rownames(new_row) <- skipped_sets[i] + # output_df <- rbind(output_df, new_row) + # } + # } + # output_df$fdr[output_df$fdr > 1] <- 1 + # return(list(Enrichment_Results = output_df, Running_Sums = Running_Sum, Items_in_Set = items_in_set)) } diff --git a/WebGestaltR.Rproj b/WebGestaltR.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/WebGestaltR.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/build_hash.py b/build_hash.py new file mode 100644 index 0000000..dd08451 --- /dev/null +++ b/build_hash.py @@ -0,0 +1,29 @@ +import os +import datetime +base: str = """ +#' WebGestaltR: The R interface for enrichment analysis with WebGestalt. +#' +#' @docType package +#' @name WebGestaltR +#' @import methods +#' @import grDevices +#' @import graphics +#' @import utils +#' @importFrom Rcpp sourceCpp +#' @importFrom rlang .data +#' @useDynLib WebGestaltR +#' +NULL + + +.onAttach <- function(lib, pkg) { + packageStartupMessage(\"******************************************\n\") + packageStartupMessage(\"* *\n\") + packageStartupMessage(\"* Welcome to WebGestaltR-rust! *\n\") + packageStartupMessage(\"* {HASHHERE} *\n\") + packageStartupMessage(\"* *\n\") + packageStartupMessage(\"******************************************\n\") +} +""" +with open("R/WebGestaltR-package.R", "w") as w: + w.write(base.replace("{HASHHERE}", datetime.datetime.now().time().__str__())) diff --git a/install b/install index 111afa3..e768f51 100755 --- a/install +++ b/install @@ -1,9 +1,11 @@ #!/usr/bin/env bash +python3 build_hash.py R -e "library(utils) remove.packages('WebGestaltR') q()" echo "Old version removed" R -e "library(devtools) +rextendr::document() install('.') q()" -echo "New version installed" \ No newline at end of file +echo "New version installed" diff --git a/man/WebGestaltR.Rd b/man/WebGestaltR.Rd index 133c603..cd6e5f8 100644 --- a/man/WebGestaltR.Rd +++ b/man/WebGestaltR.Rd @@ -9,6 +9,7 @@ \title{WebGestaltR: The R interface for enrichment analysis with WebGestalt.} \usage{ WebGestaltR( + omic_type = "single", enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, @@ -18,6 +19,7 @@ WebGestaltR( interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, + interestGeneNames = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, @@ -46,6 +48,10 @@ WebGestaltR( nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", + useWeightedSetCover = TRUE, + useAffinityPropagation = FALSE, + usekMedoid = FALSE, + kMedoid_k = 10, ... ) @@ -58,6 +64,8 @@ WebGestaltRBatch( ) } \arguments{ +\item{omic_type}{The type of omics analysis: \code{single} or \code{multi}} + \item{enrichMethod}{Enrichment methods: \code{ORA}, \code{GSEA} or \code{NTA}.} \item{organism}{Currently, WebGestaltR supports 12 organisms. Users can use the function @@ -104,6 +112,8 @@ gene list and the corresponding scores.} WebGestaltR for the selected organism can be found by the function \code{listIdType}. If the \code{organism} is \code{others}, users do not need to set this parameter.} +\item{interestGeneNames}{The names of the id lists for multiomics data.} + \item{collapseMethod}{The method to collapse duplicate IDs with scores. \code{mean}, \code{median}, \code{min} and \code{max} represent the mean, median, minimum and maximum of scores for the duplicate IDs.} diff --git a/man/fillInputDataFrame.Rd b/man/fillInputDataFrame.Rd deleted file mode 100644 index 289ab5f..0000000 --- a/man/fillInputDataFrame.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{fillInputDataFrame} -\alias{fillInputDataFrame} -\title{Fill relation data frame for GSEA input} -\usage{ -fillInputDataFrame(gmt, genes, geneSets) -} -\arguments{ -\item{gmt}{A Data Frame with geneSet and gene columns from the GMT file} - -\item{genes}{A vector of genes} - -\item{geneSets}{A vector of gene sets} -} -\value{ -A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. -} -\description{ -Fill 1 for gene in gene set -} -\author{ -Yuxing Liao -} -\keyword{internal} diff --git a/man/gseaPermutation.Rd b/man/gseaPermutation.Rd deleted file mode 100644 index c93d72d..0000000 --- a/man/gseaPermutation.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{gseaPermutation} -\alias{gseaPermutation} -\title{Permutaion in GSEA algorithm} -\usage{ -gseaPermutation(inset_scores, outset_scores, expression_value) -} -\arguments{ -\item{inset_scores}{Scaled score matrix for genes in sets} - -\item{outset_scores}{Normalized score matrix for genes not in sets} - -\item{expression_value}{Vector of gene rank scores} -} -\value{ -A vector of concatenated random minimal,maimum and best running sum scores for each set. -} -\description{ -Permutaion in GSEA algorithm -} -\author{ -Yuxing Liao -} -\keyword{internal} diff --git a/man/linkModification.Rd b/man/linkModification.Rd index 54fa922..83a6a2a 100644 --- a/man/linkModification.Rd +++ b/man/linkModification.Rd @@ -4,7 +4,13 @@ \alias{linkModification} \title{Modify the link to highlight the genes in the pathways} \usage{ -linkModification(enrichMethod, enrichPathwayLink, geneList, interestingGeneMap) +linkModification( + enrichMethod, + enrichPathwayLink, + geneList, + interestingGeneMap, + hostName = "https://www.webgestalt.org/" +) } \description{ Currently, we only have wikipathway and kegg pathways that need to modify the link diff --git a/man/rust_hello_world.Rd b/man/rust_hello_world.Rd new file mode 100644 index 0000000..fe92dd0 --- /dev/null +++ b/man/rust_hello_world.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extendr-wrappers.R +\name{rust_hello_world} +\alias{rust_hello_world} +\title{Return string `"Hello world!"` to R.} +\usage{ +rust_hello_world() +} +\description{ +Return string `"Hello world!"` to R. +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp deleted file mode 100644 index 01c0dc5..0000000 --- a/src/RcppExports.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include - -using namespace Rcpp; - -#ifdef RCPP_USE_GLOBAL_ROSTREAM -Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); -Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); -#endif - -// fillInputDataFrame -DataFrame fillInputDataFrame(DataFrame gmt, CharacterVector genes, CharacterVector geneSets); -RcppExport SEXP _WebGestaltR_fillInputDataFrame(SEXP gmtSEXP, SEXP genesSEXP, SEXP geneSetsSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< DataFrame >::type gmt(gmtSEXP); - Rcpp::traits::input_parameter< CharacterVector >::type genes(genesSEXP); - Rcpp::traits::input_parameter< CharacterVector >::type geneSets(geneSetsSEXP); - rcpp_result_gen = Rcpp::wrap(fillInputDataFrame(gmt, genes, geneSets)); - return rcpp_result_gen; -END_RCPP -} -// gseaPermutation -NumericVector gseaPermutation(NumericMatrix inset_scores, NumericMatrix outset_scores, NumericVector expression_value); -RcppExport SEXP _WebGestaltR_gseaPermutation(SEXP inset_scoresSEXP, SEXP outset_scoresSEXP, SEXP expression_valueSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericMatrix >::type inset_scores(inset_scoresSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type outset_scores(outset_scoresSEXP); - Rcpp::traits::input_parameter< NumericVector >::type expression_value(expression_valueSEXP); - rcpp_result_gen = Rcpp::wrap(gseaPermutation(inset_scores, outset_scores, expression_value)); - return rcpp_result_gen; -END_RCPP -} - -RcppExport SEXP wrap__hello_world(); - -static const R_CallMethodDef CallEntries[] = { - {"_WebGestaltR_fillInputDataFrame", (DL_FUNC) &_WebGestaltR_fillInputDataFrame, 3}, - {"_WebGestaltR_gseaPermutation", (DL_FUNC) &_WebGestaltR_gseaPermutation, 3}, - {"wrap__hello_world", (DL_FUNC) &wrap__hello_world, 0}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_WebGestaltR(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} diff --git a/src/fillInputMatrix.cpp b/src/fillInputMatrix.cpp deleted file mode 100644 index ff8b108..0000000 --- a/src/fillInputMatrix.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -using namespace Rcpp; - -//' Fill relation data frame for GSEA input -//' -//' Fill 1 for gene in gene set -//' -//' @param gmt A Data Frame with geneSet and gene columns from the GMT file -//' @param genes A vector of genes -//' @param geneSets A vector of gene sets -//' -//' @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. -//' @author Yuxing Liao -//' @keywords internal -// [[Rcpp::export]] -DataFrame fillInputDataFrame(DataFrame gmt, CharacterVector genes, CharacterVector geneSets) { - IntegerMatrix rel(genes.size(), geneSets.size()); - std::unordered_map geneIndex, setIndex; - CharacterVector gmtSet = as(gmt["geneSet"]); - CharacterVector gmtGene = as(gmt["gene"]); - List result(geneSets.size() + 1); - - for (size_t i = 0, length = genes.size(); i < length; i++) { - geneIndex[genes[i]] = i; - } - for (size_t i = 0, length = geneSets.size(); i < length; i++) { - setIndex[geneSets[i]] = i; - } - for (size_t i = 0, length = gmtSet.size(); i < length; i++) { - rel(geneIndex[gmtGene[i]], setIndex[gmtSet[i]]) = 1; - } - result[0] = genes; - for (size_t i = 1, length = result.size(); i < length; i++) { - result[i] = rel(_, i - 1); - } - geneSets.push_front("gene"); //modify in place and back in R - // Set colnames after creating DF. Use list names cannot avoid converting colon to - // syntactic names (no 'optional' parameter like in as.data.frame for list) - DataFrame df = DataFrame::create(result, _["stringsAsFactors"]=false); - df.attr("names")= geneSets; - return df; -} diff --git a/src/permutation.cpp b/src/permutation.cpp deleted file mode 100644 index 56e6b5a..0000000 --- a/src/permutation.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include -using namespace Rcpp; - - -NumericMatrix shuffleAndMultiplyColumn(NumericMatrix mat, NumericVector vec, IntegerVector rand_index) { - //multiply vector with matrix columns which are shuffled ad hoc by index - size_t nrow = mat.nrow(), ncol = mat.ncol(); - NumericMatrix m(nrow, ncol); - for (size_t j = 0; j < ncol; j++) { - for (size_t i = 0; i < nrow; i++) { - m(i, j) = mat(rand_index[i] - 1, j) * vec[i]; - } - // rare cases when all genes have zero expression values in permutation - // adjust to just 1/0 for standard GSEA, or weighted set membership inset matrix - if (is_true(all(m(_, j) == 0))) { - for (size_t i = 0; i < nrow; i++) { - m(i, j) = mat(rand_index[i] - 1, j); - } - } - } - return m; -} - -//' Permutaion in GSEA algorithm -//' -//' @param inset_scores Scaled score matrix for genes in sets -//' @param outset_scores Normalized score matrix for genes not in sets -//' @param expression_value Vector of gene rank scores -//' -//' @return A vector of concatenated random minimal,maimum and best running sum scores for each set. -//' @author Yuxing Liao -//' @keywords internal -// [[Rcpp::export]] -NumericVector gseaPermutation(NumericMatrix inset_scores, NumericMatrix outset_scores, NumericVector expression_value) { - size_t num_gene = inset_scores.nrow(); - size_t num_set = inset_scores.ncol(); - NumericMatrix rand_tot(num_gene, num_set); - NumericVector rand_res(3 * num_set); //rand_min, rand_max, rand_best concatenated - double rand_min = 0, rand_max = 0; - - IntegerVector rand_index = sample(num_gene, num_gene); - NumericMatrix rand_inset_scores = shuffleAndMultiplyColumn(inset_scores, expression_value, rand_index); - NumericVector rand_set_tot = colSums(rand_inset_scores); - - for (size_t i = 0; i < num_gene; i++) { - //shuffle outset scores here when in use without copying values beforehand - rand_inset_scores(i, _) = (rand_inset_scores(i, _) / rand_set_tot) - outset_scores(rand_index[i] - 1, _); - } - - for (size_t j = 0; j < num_set; j++) { - //explicit type conversion needed https://thecoatlessprofessor.com/programming/unofficial-rcpp-api-documentation/#carth - rand_tot(_, j) = cumsum(rand_inset_scores(_, j)).get(); - rand_max = max(rand_tot(_, j)); - rand_min = min(rand_tot(_, j)); - if (rand_max > std::abs(rand_min)) { - rand_res[j + 2 * num_set] = rand_max; - } else { - rand_res[j + 2 * num_set] = rand_min; - } - rand_res[j] = rand_min; - rand_res[j + num_set] = rand_max; - } - return rand_res; -} diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 1bf8257..69552c7 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -494,7 +494,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#30913178ccbdca5a7546a2f148f6a74abf0287c9" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#a5c47d9da656add1a68cf25a4e82fea83d4ed05c" dependencies = [ "adjustp", "csv", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 7572597..2475904 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -3,7 +3,13 @@ use std::vec; use extendr_api::prelude::*; use ndarray::Array2; use rustc_hash::FxHashMap; -use webgestalt_lib::methods::{gsea::GSEAConfig, *}; +use webgestalt_lib::{ + methods::{ + gsea::{GSEAConfig, RankListItem}, + *, + }, + readers::utils::Item, +}; /// Return string `"Hello world!"` to R. /// @export #[extendr] @@ -15,13 +21,44 @@ fn rust_hello_world() -> &'static str { /// @return List of the results of GSEA /// @export #[extendr] -fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { +fn gsea_rust( + min_overlap: Robj, + max_overlap: Robj, + sets: Robj, + parts: Robj, + analytes: Robj, + ranks: Robj, +) -> List { // webgestalt_lib::methods::gsea:: let config = GSEAConfig { - min_overlap: min_overlap.as_integer().unwrap(), - max_overlap: max_overlap.as_integer().unwrap(), + min_overlap: 15, + max_overlap: 500, ..Default::default() }; + let mut gmt: Vec = Vec::new(); + let set_vec = sets.as_str_vector().unwrap(); + let parts_vec: Vec> = parts + .as_list() + .unwrap() + .iter() + .map(|(_, x)| x.as_string_vector().unwrap()) + .collect(); + for (i, set) in set_vec.iter().enumerate() { + gmt.push(Item { + id: set.to_string(), + url: String::default(), + parts: parts_vec[i].clone(), + }) + } + let mut analyte_list: Vec = Vec::new(); + let analyte_vec: Vec<&str> = analytes.as_str_vector().unwrap(); + let ranks_vec: Vec = ranks.as_real_vector().unwrap(); + for (i, analyte) in analyte_vec.iter().enumerate() { + analyte_list.push(RankListItem { + analyte: analyte.to_string(), + rank: ranks_vec[i], + }) + } let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config); // TODO: Convert // dataframe to GMT let mut fdr: Vec = Vec::new(); @@ -30,6 +67,7 @@ fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { let mut gene_sets: Vec = Vec::new(); let mut es: Vec = Vec::new(); let mut nes: Vec = Vec::new(); + let mut running_sum: Vec = Vec::new(); for row in res { fdr.push(row.fdr); p.push(row.p); @@ -37,14 +75,16 @@ fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { gene_sets.push(row.set); es.push(row.es); nes.push(row.nes); + running_sum.push(Robj::from(row.running_sum)); } list!( fdr = fdr, - p = p, - es = es, - nes = nes, + p_val = p, + ES = es, + NES = nes, leading_edge = leading_edge, gene_sets = gene_sets, + running_sum = running_sum, ) } @@ -76,7 +116,8 @@ fn gsea_rust(min_overlap: Robj, max_overlap: Robj) -> List { pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { let genes_vec = genes.as_string_vector().unwrap(); let gene_set_vec = gene_sets.as_string_vector().unwrap(); - let mut value_array = Array2::zeros((genes.len(), gmt.len())); + println!("{:?}", gene_set_vec); + let mut value_array = Array2::zeros((genes_vec.len(), gene_set_vec.len())); let mut gene_index: FxHashMap<&String, usize> = FxHashMap::default(); let mut set_index: FxHashMap<&String, usize> = FxHashMap::default(); let gmt_set: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); @@ -87,12 +128,13 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { for (i, val) in gene_set_vec.iter().enumerate() { set_index.insert(val, i); } + println!("HERE"); for i in 0..gmt_set.len() { value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; } let mut gene_set_val: Vec> = Vec::new(); // gene_set_val.push(genes_vec.into_iter().map(|x| SafeTypes::String(x)).collect()); - for i in 0..gmt_set.len() { + for i in 0..value_array.len_of(ndarray::Axis(1)) { gene_set_val.push(value_array.column(i).to_vec()) } // gene_set_vec.insert(0, String::from("gene")); @@ -107,4 +149,6 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { extendr_module! { mod WebGestaltR; fn rust_hello_world; + fn fill_input_data_frame; + fn gsea_rust; } From 2e3e2da030ec79f50392c09761d70d8bfa6a1e13 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 12 Oct 2023 10:10:39 -0500 Subject: [PATCH 23/82] working version of GSEA --- R/WebGestaltR-package.R | 4 +- R/linkModification.R | 1 - R/swGsea.R | 200 ++++------------------------------------ src/rust/src/lib.rs | 2 - 4 files changed, 18 insertions(+), 189 deletions(-) diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index fa17344..105fbd6 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -19,9 +19,9 @@ NULL ") packageStartupMessage("* * ") - packageStartupMessage("* Welcome to WebGestaltR-rust! * + packageStartupMessage("* Welcome to WebGestaltR ! * ") - packageStartupMessage("* 17:11:26.489349 * + packageStartupMessage("* * ") packageStartupMessage("* * ") diff --git a/R/linkModification.R b/R/linkModification.R index ea9ca5d..f1ecc7d 100644 --- a/R/linkModification.R +++ b/R/linkModification.R @@ -8,7 +8,6 @@ linkModification <- function(enrichMethod, enrichPathwayLink, geneList, interest # print(enrichPathwayLink) # print(interestingGeneMap$standardId) if (grepl("www.kegg.jp", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { - print("kegg metabolite link modified") link <- keggMetaboliteLinkModification(enrichPathwayLink, geneList, interestingGeneMap, hostName) return(link) } else if (grepl("www.wikipathways.org", enrichPathwayLink, fixed = TRUE) && interestingGeneMap$standardId == "rampc") { diff --git a/R/swGsea.R b/R/swGsea.R index 75fb40d..7f7bb2e 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -206,197 +206,29 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac items_in_set <- list() rust_parts <- list() for (it in 1:ncol(inset_mat)) { - items_in_set[[colnames(inset_mat)[it]]] <- data.frame(which(inset_mat[, it] == 1), stringsAsFactors = F) - rust_parts[[it]] <- rownames(items_in_set[[colnames(inset_mat)[it]]]) - colnames(items_in_set[[colnames(inset_mat)[it]]]) <- "rank" + items_in_set[[colnames(inset_mat)[it]]] <- data.frame(which(inset_mat[, it] == 1), stringsAsFactors = F) + rust_parts[[it]] <- rownames(items_in_set[[colnames(inset_mat)[it]]]) + colnames(items_in_set[[colnames(inset_mat)[it]]]) <- "rank" } rust_analytes <- input_df[, 1] rust_ranks <- input_df[, 2] rust_sets <- colnames(inset_mat) - print(rust_parts) - rust_result <- gsea_rust(15, 500, rust_sets, rust_parts, rust_analytes, rust_ranks) - # print(rust_result) output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) - output_df$fdr[output_df$fdr > 1] <- 1 rownames(output_df) <- rust_result$gene_sets - return(list(Enrichment_Results = output_df, Running_Sums = rust_result$running_Sum, Items_in_Set = items_in_set)) - - - # adjust set scores to range from a minimum of 0+pc to a maximum of 1 (s=(Score-minScore+pc)/(maxScore-minScore+pc)); calculate s^q * |r|^p score for each item in each set; get sum of these scores for each set - # if (max_score == "max") { - # max_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - # } else { - # max_scores <- max_score - # } - # if (min_score == "min") { - # min_scores <- vector(mode = "numeric", length = ncol(inset_mat)) - # } else { - # min_scores <- min_score - # } - # scaled_scores <- inset_mat - # set_scores <- inset_mat * input_df[, colnames(inset_mat)] - # set_tot <- vector(mode = "numeric", length = ncol(inset_mat)) - # adj_expr_val <- (abs(input_df$expression_val))^p - # for (d in 1:ncol(set_scores)) { - # if (max_score == "max") { - # max_scores[d] <- max(set_scores[, d]) - # } - # if (min_score == "min") { - # min_scores[d] <- min(set_scores[set_scores[, d] > 0, d]) - # } - # set_scores[set_scores[, d] > max_scores[d], d] <- max_scores[d] - # set_scores[set_scores[, d] < min_scores[d], d] <- min_scores[d] - - # # if the max set score equals the minimum set score, all scores for items in set will be 1; therefore, let user know that this analysis now reverts to standard GSEA - # # if(max_scores[d]==min_scores[d]){ - # # print(paste0("max score for set ", colnames(inset_mat)[d], " equals min score; results will be equivalent to standard GSEA")) - # # } - - # # scales scores for sets - # scaled_scores[, d] <- ((set_scores[, d] - min_scores[d] + pc) / (max_scores[d] - min_scores[d] + pc))^q - # # multiple by inset_mat to set scores for items not in set to 0 - # scaled_scores[, d] <- scaled_scores[, d] * inset_mat[, d] - # set_scores[, d] <- scaled_scores[, d] * adj_expr_val - # if (sum(set_scores[, d]) == 0) { - # set_scores[, d] <- scaled_scores[, d] - # } - # set_tot[d] <- sum(set_scores[, d]) - # } - - # # get set of items not in in_set - # outset_mat <- 1 - inset_mat - - # # convert to vectors to matrices with same dimensions as inset_mat to use to calculate Running_Sum and for permutations below - # # expr_mat <- matrix(rep(input_df$expression_val, times = ncol(inset_mat)), nrow = nrow(input_df), dimnames = list(rownames(inset_mat), colnames(inset_mat))) - # set_tot <- t(matrix(rep(set_tot, times = nrow(inset_mat)), nrow = length(set_tot), dimnames = list(colnames(inset_mat), rownames(inset_mat)))) - # outset_mat_sums <- t(matrix(rep(colSums(outset_mat), times = nrow(outset_mat)), nrow = ncol(outset_mat), dimnames = list(colnames(outset_mat), rownames(outset_mat)))) - # outset_scores <- outset_mat / outset_mat_sums - # scores_mat <- (set_scores / set_tot) - outset_scores - - # # walk through ranked list and tally running total; also keep track of maximum and minimum values - # Running_Sum <- matrix(0, ncol = ncol(scores_mat), nrow = nrow(scores_mat), dimnames = dimnames(scores_mat)) - # running_max <- vector(mode = "numeric", length = ncol(scores_mat)) - # running_min <- vector(mode = "numeric", length = ncol(scores_mat)) - # for (e in 1:ncol(Running_Sum)) { - # Running_Sum[, e] <- cumsum(scores_mat[, e]) - # running_max[e] <- max(Running_Sum[, e]) - # running_min[e] <- min(Running_Sum[, e]) - # } - - # # permute df 1000x and use to determine p-value of max/min score from previous section - # set.seed(rng_seed) - # # FORK is an option for Unix machines to reduce memory footprint - # if (fork == T) { - # cl <- makeCluster(nThreads, type = "FORK") - # } else { - # cl <- makeCluster(nThreads) - # } - # registerDoParallel(cl) - # # use dorng instead of dopar to properly pass rng seed to foreach loop - # rand_stats <- foreach(i = 1:perms, .combine = "rbind") %dorng% { - # gseaPermutation(scaled_scores, outset_scores, adj_expr_val) - - # ## R implementation of permutations - # # - # # rand_df <- input_df[sample(nrow(input_df)), , drop = F] - # # rand_df$expression_val <- input_df$expression_val - # # rand_df <- rand_df[ , c("item","expression_val", colnames(inset_mat)), drop = F] - # # rand_scaled_scores <- scaled_scores[rand_df$item, , drop=F] - # # rand_outset_scores <- outset_scores[rand_df$item, , drop=F] - # # - # # rand_adj_scores <- rand_scaled_scores * (abs(expr_mat)^p) - # # - # # rand_set_tot <- colSums(rand_adj_scores) - # # rand_set_tot <- t(matrix(rep(rand_set_tot, times = nrow(rand_adj_scores)), nrow = length(rand_set_tot), dimnames = list(colnames(rand_adj_scores), rownames(rand_adj_scores)))) - # # rand_scores <- (rand_adj_scores / (rand_set_tot + 0.000001)) - rand_outset_scores - # # - # # rand_tot <- matrix(0, nrow = nrow(rand_scores), ncol = ncol(rand_scores)) - # # rand_max <- numeric(ncol(inset_mat)) - # # rand_min <- numeric(ncol(inset_mat)) - # # rand_best <- numeric(ncol(inset_mat)) - # # for(j in 1:ncol(rand_tot)){ - # # rand_tot[ , j] <- cumsum(rand_scores[ , j]) - # # rand_max[j] <- max(rand_tot[ , j]) - # # rand_min[j] <- min(rand_tot[ , j]) - # # if(rand_max[j] >= abs(rand_min[j])){ - # # rand_best[j] <- rand_max[j] - # # } else { rand_best[j] <- rand_min[j] } - # # } - # # c(rand_min, rand_max, rand_best) - # } - # stopCluster(cl) - # cat(paste0(perms, " permutations of ", expt, " complete...\n")) - - # # split output from permutations into iteration by set dataframes for random running totals, random maxes, and random mins - # rand_mins <- rand_stats[, 1:ncol(inset_mat)] - # colnames(rand_mins) <- colnames(inset_mat) - # rand_maxes <- rand_stats[, (ncol(inset_mat) + 1):(2 * ncol(inset_mat))] - # colnames(rand_maxes) <- colnames(inset_mat) - # rand_best <- rand_stats[, (2 * ncol(inset_mat) + 1):ncol(rand_stats)] - # colnames(rand_best) <- colnames(inset_mat) - - # # calculate max and min NES for each set - # NES_max <- running_max / (colSums((rand_best >= 0) * rand_best) / (colSums(rand_best >= 0) + 0.000001) + 0.000001) - # NES_min <- running_min / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001) - # NES_max <- NES_max * (colSums(rand_best >= 0) > 0) - # NES_min <- NES_min * (colSums(rand_best <= 0) > 0) - - # pval_max <- colSums(t(t(rand_best) >= running_max)) / (colSums(rand_best >= 0) + 0.000001) - # pval_min <- colSums(t(t(rand_best) <= running_min)) / (colSums(rand_best <= 0) + 0.000001) - - # rand_mins_NES <- t((t(rand_mins) / (colSums(abs(rand_best) * (rand_best <= 0)) / (colSums(rand_best <= 0) + 0.000001) + 0.000001)) * (colSums(rand_best <= 0) > 0)) - # rand_maxes_NES <- t((t(rand_maxes) / (colSums(rand_best * (rand_best >= 0)) / (colSums(rand_best >= 0) + 0.000001) + 0.000001)) * (colSums(rand_best >= 0) > 0)) - # # rand_mins_NES <- rand_mins_NES * (colSums(rand_best>=0) > 0) - # # rand_maxes_NES <- rand_maxes_NES * (colSums(rand_best<=0) > 0) - - # # calculate FDR for each NES - # n_all_rand_min <- sum(rand_best <= 0) - # if (n_all_rand_min == 0) { - # n_all_rand_min <- 0.000001 - # } - # n_all_rand_max <- sum(rand_best >= 0) - # if (n_all_rand_max == 0) { - # n_all_rand_min <- 0.000001 - # } - - # # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 8, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr", "pos_ES", "neg_ES", "pos_NES", "neg_NES"))) - # output_mat <- matrix(0, nrow = ncol(Running_Sum), ncol = 4, dimnames = list(colnames(Running_Sum), c("ES", "NES", "p_val", "fdr"))) - # output_df <- data.frame(output_mat, stringsAsFactors = F) - - # for (k in 1:length(NES_min)) { - # # output_df$pos_ES[k] <- running_max[k] - # # output_df$neg_ES[k] <- running_min[k] - # # output_df$pos_NES[k] <- NES_max[k] - # # output_df$neg_NES[k] <- NES_min[k] - # if (abs(running_min[k]) < running_max[k]) { - # output_df$ES[k] <- running_max[k] - # output_df$NES[k] <- NES_max[k] - # output_df$p_val[k] <- pval_max[k] - # max_top <- sum(rowSums(rand_maxes_NES * (rand_best >= 0) >= NES_max[k])) / n_all_rand_max - # max_bottom <- sum(NES_max[running_max >= abs(running_min)] >= NES_max[k]) / (sum(running_max >= abs(running_min))) - # output_df$fdr[k] <- max_top / max_bottom - # } else { - # output_df$ES[k] <- running_min[k] - # output_df$NES[k] <- NES_min[k] - # output_df$p_val[k] <- pval_min[k] - # min_top <- sum(rowSums(rand_mins_NES * (rand_best <= 0) <= NES_min[k])) / n_all_rand_min - # min_bottom <- sum(NES_min[abs(running_min) > running_max] <= NES_min[k]) / sum(abs(running_min) > running_max) - # output_df$fdr[k] <- min_top / min_bottom - # } - # } - # if ((thresh_action == "include") & (length(skipped_sets) > 0)) { - # new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) - # colnames(new_row) <- colnames(output_df) - # new_row$p_val <- 1 - # new_row$fdr <- 1 - # for (i in 1:length(skipped_sets)) { - # rownames(new_row) <- skipped_sets[i] - # output_df <- rbind(output_df, new_row) - # } - # } - # output_df$fdr[output_df$fdr > 1] <- 1 - # return(list(Enrichment_Results = output_df, Running_Sums = Running_Sum, Items_in_Set = items_in_set)) + running_sum <- matrix(unlist(rust_result$running_sum), nrow = length(rownames(inset_mat)), ncol= length(colnames(inset_mat)), dimnames = list(rownames(inset_mat), colnames(inset_mat))) + if ((thresh_action == "include") & (length(skipped_sets) > 0)) { + new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) + colnames(new_row) <- colnames(output_df) + new_row$p_val <- 1 + new_row$fdr <- 1 + for (i in 1:length(skipped_sets)) { + rownames(new_row) <- skipped_sets[i] + output_df <- rbind(output_df, new_row) + } + } + output_df$fdr[output_df$fdr > 1] <- 1 + return(list(Enrichment_Results = output_df, Running_Sums = running_sum, Items_in_Set = items_in_set)) } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 2475904..d08d29f 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -116,7 +116,6 @@ fn gsea_rust( pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { let genes_vec = genes.as_string_vector().unwrap(); let gene_set_vec = gene_sets.as_string_vector().unwrap(); - println!("{:?}", gene_set_vec); let mut value_array = Array2::zeros((genes_vec.len(), gene_set_vec.len())); let mut gene_index: FxHashMap<&String, usize> = FxHashMap::default(); let mut set_index: FxHashMap<&String, usize> = FxHashMap::default(); @@ -128,7 +127,6 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { for (i, val) in gene_set_vec.iter().enumerate() { set_index.insert(val, i); } - println!("HERE"); for i in 0..gmt_set.len() { value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; } From 1179275cbc09115dae4d137fe3e8e4fb32097620 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 12 Oct 2023 16:47:27 -0500 Subject: [PATCH 24/82] Add working ORA --- NAMESPACE | 5 +- R/WebGestaltR-package.R | 4 +- R/extendr-wrappers.R | 9 +- R/oraEnrichment.R | 166 ++++++++++++++++++++++------------- R/swGsea.R | 2 +- man/fill_input_data_frame.Rd | 36 ++++++++ man/gsea_rust.Rd | 11 +++ man/ora_rust.Rd | 8 ++ src/rust/Cargo.lock | 2 +- src/rust/src/lib.rs | 69 +++++++++++++-- 10 files changed, 236 insertions(+), 76 deletions(-) create mode 100644 man/fill_input_data_frame.Rd create mode 100644 man/gsea_rust.Rd create mode 100644 man/ora_rust.Rd diff --git a/NAMESPACE b/NAMESPACE index 6b0b175..109acae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,15 @@ # Generated by roxygen2: do not edit by hand -export() export(GOSlimSummary) export(IDMapping) export(WebGestaltR) export(WebGestaltRBatch) export(WebGestaltR_batch) export(affinityPropagation) +export(fill_input_data_frame) export(formatCheck) export(goSlimSummary) +export(gsea_rust) export(idMapping) export(idToSymbol) export(listArchiveURL) @@ -19,6 +20,7 @@ export(listIdType) export(listOrganism) export(listReferenceSet) export(loadGeneSet) +export(ora_rust) export(prepareGseaInput) export(prepareInputMatrixGsea) export(readGmt) @@ -39,6 +41,7 @@ importFrom(dplyr,bind_rows) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,filter) +importFrom(dplyr,group_by) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index 105fbd6..a6e9d8a 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -19,9 +19,9 @@ NULL ") packageStartupMessage("* * ") - packageStartupMessage("* Welcome to WebGestaltR ! * + packageStartupMessage("* Welcome to WebGestaltR-rust! * ") - packageStartupMessage("* * + packageStartupMessage("* 16:39:34.080109 * ") packageStartupMessage("* * ") diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 46f7b31..cd262d0 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -37,14 +37,21 @@ rust_hello_world <- function() .Call(wrap__rust_hello_world) #' #' @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. #' @author John Elizarraras +#' @name fill_input_data_frame #' @keywords internal #' @export fill_input_data_frame <- function(gmt, genes, gene_sets) .Call(wrap__fill_input_data_frame, gmt, genes, gene_sets) #' Run GSEA using rust library #' @return List of the results of GSEA +#' @name gsea_rust #' @export -gsea_rust <- function(min_overlap, max_overlap, sets, parts, analytes, ranks) .Call(wrap__gsea_rust, min_overlap, max_overlap, sets, parts, analytes, ranks) +gsea_rust <- function(min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) .Call(wrap__gsea_rust, min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) + +#' Run ORA using Rust library +#' @name ora_rust +#' @export +ora_rust <- function(sets, parts, interest, reference) .Call(wrap__ora_rust, sets, parts, interest, reference) # nolint end diff --git a/R/oraEnrichment.R b/R/oraEnrichment.R index 44be6d4..08e8b00 100644 --- a/R/oraEnrichment.R +++ b/R/oraEnrichment.R @@ -1,73 +1,113 @@ -#' @importFrom dplyr filter select left_join mutate arrange %>% +#' @importFrom dplyr filter select left_join mutate arrange %>% group_by inner_join #' @importFrom stats p.adjust phyper -oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10) { - #before running this code, the main code has checked the overlap among interestGene, referenceGene and geneSet. - #And this three sets should have overlapping genes. +oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10) { + # before running this code, the main code has checked the overlap among interestGene, referenceGene and geneSet. + # And this three sets should have overlapping genes. - # The calculation is based on the genes (input and reference) - # with annotations in GMT (i.e. effective genes shown in final HTML report) - # One common question is why input GMT affects results - # While GSEA does not have this - referenceGene <- intersect(referenceGene, geneSet$gene) + # The calculation is based on the genes (input and reference) + # with annotations in GMT (i.e. effective genes shown in final HTML report) + # One common question is why input GMT affects results + # While GSEA does not have this + referenceGene <- intersect(referenceGene, geneSet$gene) - geneSet <- filter(geneSet, .data$gene %in% referenceGene) + geneSet <- filter(geneSet, .data$gene %in% referenceGene) - geneSetNum <- tapply(geneSet$gene, geneSet$geneSet,length) - geneSetNum <- geneSetNum[geneSetNum>=minNum & geneSetNum<=maxNum] - if (length(geneSetNum) == 0) { - stop("ERROR: The number of annotated genes for all functional categories are not from ", minNum, " to ", maxNum, " for the ORA enrichment method.") - } + geneSetNum <- tapply(geneSet$gene, geneSet$geneSet, length) + geneSetNum <- geneSetNum[geneSetNum >= minNum & geneSetNum <= maxNum] + if (length(geneSetNum) == 0) { + stop("ERROR: The number of annotated genes for all functional categories are not from ", minNum, " to ", maxNum, " for the ORA enrichment method.") + } - interestGene <- intersect(interestGene, geneSet$gene) - interestGene <- intersect(interestGene, referenceGene) - if (length(interestGene) == 0) { - stop("ERROR: No genes in the interesting list can annotate to any functional category.") - } + interestGene <- intersect(interestGene, geneSet$gene) + interestGene <- intersect(interestGene, referenceGene) + if (length(interestGene) == 0) { + stop("ERROR: No genes in the interesting list can annotate to any functional category.") + } + refG <- data.frame(geneSet = names(geneSetNum), size = as.numeric(geneSetNum), stringsAsFactors = FALSE) + intG <- filter(geneSet, .data$gene %in% interestGene) + intGId <- tapply(intG$gene, intG$geneSet, paste, collapse = ";") + intGId <- data.frame(geneSet = names(intGId), overlapId = as.character(intGId), stringsAsFactors = FALSE) + intGNum <- tapply(intG$gene, intG$geneSet, length) # a vector of overlap with geneset as name + intGNum <- data.frame(geneSet = names(intGNum), overlap = as.numeric(intGNum), stringsAsFactors = FALSE) + genes <- tapply(geneSet$gene, geneSet$geneSet, rbind) + rust_result <- ora_rust(names(genes), genes, interestGene, referenceGene) + rust_result_df <- data.frame( + FDR = rust_result$fdr, pValue = rust_result$p, expect = rust_result$expect, + enrichmentRatio = rust_result$enrichment_ratio, geneSet = rust_result$gene_set + ) + # geneSet <- geneSet %>% + # filter(!is.na(.data$geneSet)) %>% + # filter(.data$geneSet %in% names(geneSetNum)) %>% + # select(.data$geneSet, link = .data$description) %>% + # distinct() %>% + # left_join(refG, by = "geneSet") %>% + # left_join(intGNum, by = "geneSet") + # rust_result <- filter(rust_result_df, .data$geneSet %in% as.data.frame(geneSet)$geneSet) + # print(rust_result) + # enrichedResult <- data.frame( + # FDR = rust_result$fdr, pValue = rust_result$p, expect = rust_result$expect, overlap = rust_result$overlap, + # enrichmentRato = rust_result$enrichment_ratio, geneSet = rust_result$gene_set + # ) %>% + # left_join(intGId, by = "geneSet") %>% # get overlapping gene IDs + # arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) - ###############Enrichment analysis################### - ra <- length(interestGene) / length(referenceGene) # ratio - refG <- data.frame(geneSet=names(geneSetNum), size=as.numeric(geneSetNum), stringsAsFactors=FALSE) + enrichedResult <- geneSet %>% + filter(!is.na(.data$geneSet)) %>% + filter(.data$geneSet %in% names(geneSetNum)) %>% + select(.data$geneSet, link = .data$description) %>% + distinct() %>% + left_join(refG, by = "geneSet") %>% + left_join(intGNum, by = "geneSet") %>% # this may just be inner_join. O is NA should not be meaningful anyway + inner_join( + rust_result_df, + by = "geneSet", + ) %>% + left_join(intGId, by = "geneSet") %>% # get overlapping gene IDs + arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) + ############### Enrichment analysis################### + # ra <- length(interestGene) / length(referenceGene) # ratio + # refG <- data.frame(geneSet=names(geneSetNum), size=as.numeric(geneSetNum), stringsAsFactors=FALSE) + # intGNum <- tapply(intG$gene, intG$geneSet, length) # a vector of overlap with geneset as name + # intGNum <- data.frame(geneSet=names(intGNum), overlap=as.numeric(intGNum), stringsAsFactors=FALSE) - intG <- filter(geneSet, .data$gene %in% interestGene) - intGNum <- tapply(intG$gene, intG$geneSet, length) # a vector of overlap with geneset as name - intGNum <- data.frame(geneSet=names(intGNum), overlap=as.numeric(intGNum), stringsAsFactors=FALSE) + # intGId <- tapply(intG$gene, intG$geneSet, paste, collapse=";") + # intGId <- data.frame(geneSet=names(intGId), overlapId=as.character(intGId), stringsAsFactors=FALSE) - intGId <- tapply(intG$gene, intG$geneSet, paste, collapse=";") - intGId <- data.frame(geneSet=names(intGId), overlapId=as.character(intGId), stringsAsFactors=FALSE) + # # .data from rlang needed for package to pass R CMD check + # # https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html + # enrichedResult <- geneSet %>% filter(!is.na(.data$geneSet)) %>% + # filter(.data$geneSet %in% names(geneSetNum)) %>% + # select(.data$geneSet, link=.data$description) %>% distinct() %>% + # left_join(refG, by="geneSet") %>% + # left_join(intGNum, by="geneSet") %>% # this may just be inner_join. O is NA should not be meaningful anyway + # mutate(overlap=ifelse(is.na(.data$overlap), 0, .data$overlap), expect=.data$size * ra, enrichmentRatio=.data$overlap /.data$expect, + # pValue=1-phyper(.data$overlap - 1, length(interestGene), length(referenceGene) - length(interestGene), .data$size, lower.tail=TRUE, log.p=FALSE), + # FDR=p.adjust(.data$pValue, method=fdrMethod) + # ) %>% + # left_join(intGId, by="geneSet") %>% # get overlapping gene IDs + # arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) - # .data from rlang needed for package to pass R CMD check - # https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html - enrichedResult <- geneSet %>% filter(!is.na(.data$geneSet)) %>% - filter(.data$geneSet %in% names(geneSetNum)) %>% - select(.data$geneSet, link=.data$description) %>% distinct() %>% - left_join(refG, by="geneSet") %>% - left_join(intGNum, by="geneSet") %>% # this may just be inner_join. O is NA should not be meaningful anyway - mutate(overlap=ifelse(is.na(.data$overlap), 0, .data$overlap), expect=.data$size * ra, enrichmentRatio=.data$overlap /.data$expect, - pValue=1-phyper(.data$overlap - 1, length(interestGene), length(referenceGene) - length(interestGene), .data$size, lower.tail=TRUE, log.p=FALSE), - FDR=p.adjust(.data$pValue, method=fdrMethod) - ) %>% - left_join(intGId, by="geneSet") %>% # get overlapping gene IDs - arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) - - if (sigMethod == "fdr") { - enrichedResultSig <- filter(enrichedResult, .data$FDR% filter(.data$FDR >= fdrThr, .data$overlap != 0) %>% select(.data$geneSet, .data$enrichmentRatio, .data$FDR, .data$overlap) - return(list(enriched=enrichedResultSig, background=enrichedResultInsig)) - } - } else { - #for the top method, we only select the terms with at least one annotated interesting gene - enrichedResult <- enrichedResult %>% filter(.data$overlap != 0) - if (nrow(enrichedResult)>topThr) { - enrichedResultSig <- enrichedResult[1:topThr, ] - enrichedResultInsig <- enrichedResult[(topThr+1):nrow(enrichedResult), c("geneSet", "enrichmentRatio", "FDR", "overlap")] - }else{ - enrichedResultSig <- enrichedResult - enrichedResultInsig <- data.frame() - } - return(list(enriched=enrichedResultSig, background=enrichedResultInsig)) - } + if (sigMethod == "fdr") { + enrichedResultSig <- filter(enrichedResult, .data$FDR < fdrThr) + if (nrow(enrichedResultSig) == 0) { + warning("No significant gene set is identified based on FDR ", fdrThr, "!") + return(NULL) + } else { + enrichedResultInsig <- enrichedResult %>% + filter(.data$FDR >= fdrThr, .data$overlap != 0) %>% + select(.data$geneSet, .data$enrichmentRatio, .data$FDR, .data$overlap) + return(list(enriched = enrichedResultSig, background = enrichedResultInsig)) + } + } else { + # for the top method, we only select the terms with at least one annotated interesting gene + enrichedResult <- enrichedResult %>% filter(.data$overlap != 0) + if (nrow(enrichedResult) > topThr) { + enrichedResultSig <- enrichedResult[1:topThr, ] + enrichedResultInsig <- enrichedResult[(topThr + 1):nrow(enrichedResult), c("geneSet", "enrichmentRatio", "FDR", "overlap")] + } else { + enrichedResultSig <- enrichedResult + enrichedResultInsig <- data.frame() + } + return(list(enriched = enrichedResultSig, background = enrichedResultInsig)) + } } diff --git a/R/swGsea.R b/R/swGsea.R index 7f7bb2e..471c21b 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -213,7 +213,7 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac rust_analytes <- input_df[, 1] rust_ranks <- input_df[, 2] rust_sets <- colnames(inset_mat) - rust_result <- gsea_rust(15, 500, rust_sets, rust_parts, rust_analytes, rust_ranks) + rust_result <- gsea_rust(min_set_size, max_set_size, rust_sets, rust_parts, rust_analytes, rust_ranks) output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) rownames(output_df) <- rust_result$gene_sets running_sum <- matrix(unlist(rust_result$running_sum), nrow = length(rownames(inset_mat)), ncol= length(colnames(inset_mat)), dimnames = list(rownames(inset_mat), colnames(inset_mat))) diff --git a/man/fill_input_data_frame.Rd b/man/fill_input_data_frame.Rd new file mode 100644 index 0000000..6c7301f --- /dev/null +++ b/man/fill_input_data_frame.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extendr-wrappers.R +\name{fill_input_data_frame} +\alias{fill_input_data_frame} +\title{Fill relation data frame for GSEA input} +\arguments{ +\item{gmt}{A Data Frame with geneSet and gene columns from the GMT file} + +\item{genes}{A vector of genes} + +\item{gene_sets}{A vector of gene sets} +} +\value{ +A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. +} +\description{ +Fill 1 for gene in gene set +} +\details{ +See https://github.com/extendr/extendr/issues/612 for how to export DataFrame + +## Diagram +```shell + Gene Sets + ┌───────────┐ First column named 'gene' containing gene name + │A0100110100│ 1 = in set +Genes │B0100101000│ 0 = not in set + │C1011101001│ Due to limitiations with extendr-api v 0.6.0, + └───────────┘ function returns a list, and the R package will + add the first 'gene' column +``` +} +\author{ +John Elizarraras +} +\keyword{internal} diff --git a/man/gsea_rust.Rd b/man/gsea_rust.Rd new file mode 100644 index 0000000..d23f581 --- /dev/null +++ b/man/gsea_rust.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extendr-wrappers.R +\name{gsea_rust} +\alias{gsea_rust} +\title{Run GSEA using rust library} +\value{ +List of the results of GSEA +} +\description{ +Run GSEA using rust library +} diff --git a/man/ora_rust.Rd b/man/ora_rust.Rd new file mode 100644 index 0000000..b4b6586 --- /dev/null +++ b/man/ora_rust.Rd @@ -0,0 +1,8 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extendr-wrappers.R +\name{ora_rust} +\alias{ora_rust} +\title{Run ORA using Rust library} +\description{ +Run ORA using Rust library +} diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 69552c7..f877042 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -494,7 +494,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#a5c47d9da656add1a68cf25a4e82fea83d4ed05c" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#d173db1e6d96f213470e9149bb04c057039d707e" dependencies = [ "adjustp", "csv", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index d08d29f..d75aaaf 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -2,12 +2,10 @@ use std::vec; use extendr_api::prelude::*; use ndarray::Array2; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use webgestalt_lib::{ - methods::{ - gsea::{GSEAConfig, RankListItem}, - *, - }, + methods::gsea::{GSEAConfig, RankListItem}, + methods::ora::{get_ora, ORAConfig, ORAResult}, readers::utils::Item, }; /// Return string `"Hello world!"` to R. @@ -17,13 +15,67 @@ fn rust_hello_world() -> &'static str { "Hello world!" } +/// Run ORA using Rust library +/// @name ora_rust +/// @export +#[extendr] +fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { + let config: ORAConfig = ORAConfig { + ..Default::default() + }; + let mut gmt: Vec = Vec::new(); + let set_vec = sets.as_str_vector().unwrap(); + let parts_vec: Vec> = parts + .as_list() + .unwrap() + .iter() + .map(|(_, x)| x.as_string_vector().unwrap()) + .collect(); + for (i, set) in set_vec.iter().enumerate() { + gmt.push(Item { + id: set.to_string(), + url: String::default(), + parts: parts_vec[i].clone(), + }) + } + let interest_set: FxHashSet = + FxHashSet::from_iter(interest.as_string_vector().unwrap()); + let reference_set: FxHashSet = + FxHashSet::from_iter(reference.as_string_vector().unwrap()); + let res: Vec = get_ora(&interest_set, &reference_set, gmt, config); + let mut p: Vec = Vec::new(); + let mut fdr: Vec = Vec::new(); + let mut expect: Vec = Vec::new(); + let mut enrichment_ratio: Vec = Vec::new(); + let mut overlap: Vec = Vec::new(); + let mut gene_set: Vec = Vec::new(); + for row in res { + gene_set.push(row.set); + p.push(row.p); + fdr.push(row.fdr); + expect.push(row.expected); + overlap.push(row.overlap); + enrichment_ratio.push(row.enrichment_ratio); + } + list!( + p = p, + gene_set = gene_set, + fdr = fdr, + expect = expect, + overlap = overlap, + enrichment_ratio = enrichment_ratio + ) +} + /// Run GSEA using rust library /// @return List of the results of GSEA +/// @name gsea_rust /// @export #[extendr] fn gsea_rust( min_overlap: Robj, max_overlap: Robj, + permutations: Robj, sets: Robj, parts: Robj, analytes: Robj, @@ -31,8 +83,9 @@ fn gsea_rust( ) -> List { // webgestalt_lib::methods::gsea:: let config = GSEAConfig { - min_overlap: 15, - max_overlap: 500, + min_overlap: min_overlap.as_real().unwrap() as i32, + max_overlap: max_overlap.as_real().unwrap() as i32, + permutations: permutations.as_real().unwrap() as i32, ..Default::default() }; let mut gmt: Vec = Vec::new(); @@ -110,6 +163,7 @@ fn gsea_rust( /// /// @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. /// @author John Elizarraras +/// @name fill_input_data_frame /// @keywords internal /// @export #[extendr] @@ -149,4 +203,5 @@ extendr_module! { fn rust_hello_world; fn fill_input_data_frame; fn gsea_rust; + fn ora_rust; } From ec742ee8e600fcf30e75985472512130b5846c1e Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 13 Oct 2023 10:01:46 -0500 Subject: [PATCH 25/82] add better readme --- R/WebGestaltR-package.R | 2 +- README.md | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index a6e9d8a..7394580 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -21,7 +21,7 @@ NULL ") packageStartupMessage("* Welcome to WebGestaltR-rust! * ") - packageStartupMessage("* 16:39:34.080109 * + packageStartupMessage("* 17:08:38.143165 * ") packageStartupMessage("* * ") diff --git a/README.md b/README.md index 7604500..0cb28b5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -# WebGestalt +# WebGestaltR + +> [!IMPORTANT] +> The new version of WebGesaltR requires Rust, which must be installed on your device prior to installing or updating the package from CRAN. See the installation section for more information. WebGestalt R package is the R version of our well-known web application tool WebGestalt (www.webgestalt.org) that has on average 27,000 users from 140 countries and territories per year and has also been cited 371 in 2016. The advantage of this R package is that it can be easily integrated to other pipelines or simultaneously analyze multiple gene lists. @@ -6,4 +9,13 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen ## Installation -Since WebGestaltR v2.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. +Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. + +After installing Rust, you can install WebGestaltR with the following command: + +```R +# install.packages("devtools") # run if needed +devtools::install_github("bzhanglab/WebGestaltR") +``` + +During installation, the Rust compiler will be called to build the computation library used by WebGestaltR. If you run into problems with installation of the new version, please [open a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). From f5109805714857c4c983f81313032427a1cc0eb5 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 13 Oct 2023 14:43:32 -0500 Subject: [PATCH 26/82] attempt to fix running sum with no luck --- DESCRIPTION | 2 +- R/WebGestaltR-package.R | 2 +- R/swGsea.R | 8 +++++--- README.md | 7 +++++++ src/rust/Cargo.lock | 10 +++++----- src/rust/src/lib.rs | 2 +- 6 files changed, 20 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 274b573..3964d14 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,5 +22,5 @@ Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), NeedsCompilation: yes LinkingTo: Rcpp RoxygenNote: 7.2.3 -SystemRequirements: Cargo (Rust's package manager), rustc v1.66.0 +SystemRequirements: Cargo (Rust's package manager), rustc v1.66.0+ Config/rextendr/version: 0.3.1 diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index 7394580..ccacb33 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -21,7 +21,7 @@ NULL ") packageStartupMessage("* Welcome to WebGestaltR-rust! * ") - packageStartupMessage("* 17:08:38.143165 * + packageStartupMessage("* 14:19:24.134118 * ") packageStartupMessage("* * ") diff --git a/R/swGsea.R b/R/swGsea.R index 471c21b..af89bba 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -104,7 +104,7 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac expt <- colnames(input_df)[2] enr_test <- colnames(input_df)[3:ncol(input_df)] colnames(input_df)[c(1, 2)] <- c("item", "expression_val") - # input_df <- arrange(input_df, desc(.data$expression_val)) + input_df <- arrange(input_df, desc(.data$expression_val)) # get and check size of set items; build in-set matrix of 1's for items in set and 0's for items not in set inset_mat <- matrix(0, nrow = length(input_df$item), ncol = length(enr_test)) @@ -213,10 +213,12 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac rust_analytes <- input_df[, 1] rust_ranks <- input_df[, 2] rust_sets <- colnames(inset_mat) - rust_result <- gsea_rust(min_set_size, max_set_size, rust_sets, rust_parts, rust_analytes, rust_ranks) + rust_result <- gsea_rust(min_set_size, max_set_size, perms, rust_sets, rust_parts, rust_analytes, rust_ranks) output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) rownames(output_df) <- rust_result$gene_sets - running_sum <- matrix(unlist(rust_result$running_sum), nrow = length(rownames(inset_mat)), ncol= length(colnames(inset_mat)), dimnames = list(rownames(inset_mat), colnames(inset_mat))) + + running_sum <- matrix(unlist(unlist(rust_result$running_sum)), nrow = length(rust_result$running_sum[[1]]), ncol = length(rust_result$running_sum), dimnames = list(rust_analytes,rust_sets)) + print(head(rust_result$running_sum)) if ((thresh_action == "include") & (length(skipped_sets) > 0)) { new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) colnames(new_row) <- colnames(output_df) diff --git a/README.md b/README.md index 0cb28b5..4734d31 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,13 @@ WebGestalt R package is the R version of our well-known web application tool Web WebGestaltR function can perform popular enrichment analyses: ORA (Over-Representation Analysis), GSEA (Gene Set Enrichment Analysis) and NTA (Network Topology Analysis). Based on the user-uploaded gene list or gene list with scores (for GSEA method), WebGestaltR function will first map the gene list to entrez gene IDs and then summarize the gene list based on the GO (Gene Ontology) Slim data. After performing the enrichment analysis, WebGestaltR function also returns an user-friendly HTML report containing GO Slim summary and enrichment analysis result. If the functional categories have the DAG (directed acyclic graph) structure, the structure of the enriched categories can also be visualized in the report. +## New Changes + +> [!INFO] +> Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). + +WebGestaltR's core was re-written in Rust, which dramatically increased performance, with up to 15x the speed of previous versions. + ## Installation Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index f877042..95e1a49 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -407,18 +407,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", @@ -494,7 +494,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#d173db1e6d96f213470e9149bb04c057039d707e" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#a5006bfd8d26288c126d4117c2c0c513839370ac" dependencies = [ "adjustp", "csv", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index d75aaaf..d2eaa96 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -112,7 +112,7 @@ fn gsea_rust( rank: ranks_vec[i], }) } - let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config); // TODO: Convert + let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); // TODO: Convert // dataframe to GMT let mut fdr: Vec = Vec::new(); let mut p: Vec = Vec::new(); From 1406f922f60e5346445a09362cc5b41ce6b5b807 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Fri, 13 Oct 2023 14:44:10 -0500 Subject: [PATCH 27/82] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4734d31..d192cb0 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen ## New Changes -> [!INFO] +> [!NOTE] > Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). WebGestaltR's core was re-written in Rust, which dramatically increased performance, with up to 15x the speed of previous versions. From 2ba25401b738c8aceb25b30f6b5c121b64a796ec Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 16 Oct 2023 14:07:15 -0500 Subject: [PATCH 28/82] Fix running_sum and finalize first working version of R package --- R/WebGestaltR-package.R | 13 ++++++------- R/swGsea.R | 5 ++--- README.md | 18 +++++++++--------- src/rust/Cargo.toml | 6 +++--- src/rust/src/lib.rs | 9 +++------ 5 files changed, 23 insertions(+), 28 deletions(-) diff --git a/R/WebGestaltR-package.R b/R/WebGestaltR-package.R index ccacb33..d291d1b 100644 --- a/R/WebGestaltR-package.R +++ b/R/WebGestaltR-package.R @@ -1,4 +1,3 @@ - #' WebGestaltR: The R interface for enrichment analysis with WebGestalt. #' #' @docType package @@ -15,16 +14,16 @@ NULL .onAttach <- function(lib, pkg) { - packageStartupMessage("****************************************** + packageStartupMessage("****************************************** ") - packageStartupMessage("* * + packageStartupMessage("* * ") - packageStartupMessage("* Welcome to WebGestaltR-rust! * + packageStartupMessage("* Welcome to WebGestaltR-rust! * ") - packageStartupMessage("* 14:19:24.134118 * + packageStartupMessage("* * ") - packageStartupMessage("* * + packageStartupMessage("* * ") - packageStartupMessage("****************************************** + packageStartupMessage("****************************************** ") } diff --git a/R/swGsea.R b/R/swGsea.R index af89bba..33e2bcc 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -216,9 +216,8 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac rust_result <- gsea_rust(min_set_size, max_set_size, perms, rust_sets, rust_parts, rust_analytes, rust_ranks) output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) rownames(output_df) <- rust_result$gene_sets - - running_sum <- matrix(unlist(unlist(rust_result$running_sum)), nrow = length(rust_result$running_sum[[1]]), ncol = length(rust_result$running_sum), dimnames = list(rust_analytes,rust_sets)) - print(head(rust_result$running_sum)) + running_sum <- do.call('cbind', rust_result$running_sum) + dimnames(running_sum) <- list(rust_analytes, names(rust_result$running_sum)) if ((thresh_action == "include") & (length(skipped_sets) > 0)) { new_row <- data.frame(matrix(0, nrow = 1, ncol = 4), stringsAsFactors = F) colnames(new_row) <- colnames(output_df) diff --git a/README.md b/README.md index d192cb0..50bbeac 100644 --- a/README.md +++ b/README.md @@ -7,22 +7,22 @@ WebGestalt R package is the R version of our well-known web application tool Web WebGestaltR function can perform popular enrichment analyses: ORA (Over-Representation Analysis), GSEA (Gene Set Enrichment Analysis) and NTA (Network Topology Analysis). Based on the user-uploaded gene list or gene list with scores (for GSEA method), WebGestaltR function will first map the gene list to entrez gene IDs and then summarize the gene list based on the GO (Gene Ontology) Slim data. After performing the enrichment analysis, WebGestaltR function also returns an user-friendly HTML report containing GO Slim summary and enrichment analysis result. If the functional categories have the DAG (directed acyclic graph) structure, the structure of the enriched categories can also be visualized in the report. -## New Changes - -> [!NOTE] -> Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). - -WebGestaltR's core was re-written in Rust, which dramatically increased performance, with up to 15x the speed of previous versions. - ## Installation Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. -After installing Rust, you can install WebGestaltR with the following command: +After installing Rust, you can install WebGestaltR by running the following command in an R session: ```R -# install.packages("devtools") # run if needed +# install.packages("devtools") # run if devtools not already installed devtools::install_github("bzhanglab/WebGestaltR") ``` During installation, the Rust compiler will be called to build the computation library used by WebGestaltR. If you run into problems with installation of the new version, please [open a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). + +## Changes + +> [!NOTE] +> Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). + +WebGestaltR's core was re-written in Rust, which dramatically increased performance, with up to 15x the speed of previous versions. The new version also supports metabolomics, with support for 15 different ID types. diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 6e83d46..fe1cd0a 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -4,11 +4,11 @@ version = '0.1.0' edition = '2021' [lib] -crate-type = [ 'staticlib' ] +crate-type = ['staticlib'] name = 'WebGestaltR' [dependencies] -extendr-api = '0.6.0' +extendr-api = { version = '0.6.0' } ndarray = "0.15.6" rustc-hash = "1.1.0" -webgestalt_lib = {git = "https://github.com/bzhanglab/webgestalt_rust.git"} +webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git" } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index d2eaa96..f7ac8d7 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -112,8 +112,7 @@ fn gsea_rust( rank: ranks_vec[i], }) } - let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); // TODO: Convert - // dataframe to GMT + let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); // TODO: Convert dataframe to GMT let mut fdr: Vec = Vec::new(); let mut p: Vec = Vec::new(); let mut leading_edge: Vec = Vec::new(); @@ -136,8 +135,8 @@ fn gsea_rust( ES = es, NES = nes, leading_edge = leading_edge, - gene_sets = gene_sets, - running_sum = running_sum, + gene_sets = gene_sets.clone(), + running_sum = List::from_names_and_values(gene_sets, running_sum), ) } @@ -185,11 +184,9 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; } let mut gene_set_val: Vec> = Vec::new(); - // gene_set_val.push(genes_vec.into_iter().map(|x| SafeTypes::String(x)).collect()); for i in 0..value_array.len_of(ndarray::Axis(1)) { gene_set_val.push(value_array.column(i).to_vec()) } - // gene_set_vec.insert(0, String::from("gene")); // Construct DataFrame in R. Create list for now. List::from_names_and_values(gene_set_vec, gene_set_val).unwrap() // data_frame!(x = 1) From fef4641a72e9d7739230eb65be094f0c53d8e408 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:09:15 -0500 Subject: [PATCH 29/82] Fix repo in install instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 50bbeac..90691b4 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ After installing Rust, you can install WebGestaltR by running the following comm ```R # install.packages("devtools") # run if devtools not already installed -devtools::install_github("bzhanglab/WebGestaltR") +devtools::install_github("iblacksand/WebGestaltR") ``` During installation, the Rust compiler will be called to build the computation library used by WebGestaltR. If you run into problems with installation of the new version, please [open a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). From 144a82ad59e47fc3b8f3a41fb072ed3caea0e903 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 16 Oct 2023 14:32:23 -0500 Subject: [PATCH 30/82] add requirements --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 90691b4..30f4247 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,14 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen ## Installation +--- +### Requirements + +- R (>= 4.2.0) +- Rust (>= 1.54.0) + +--- + Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. After installing Rust, you can install WebGestaltR by running the following command in an R session: From 79744108ee1b9170ff526abc85a018adb29ddba7 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:32:48 -0500 Subject: [PATCH 31/82] Update README.md heading size --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 30f4247..d927249 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen ## Installation --- -### Requirements +#### Requirements - R (>= 4.2.0) - Rust (>= 1.54.0) From b7ed1e854e678ff5dcde1d851fd55e97c5417e18 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:34:23 -0500 Subject: [PATCH 32/82] Update R version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d927249..3840968 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen --- #### Requirements -- R (>= 4.2.0) +- R (>= 4.0.0) - Rust (>= 1.54.0) --- From a7f5d7519e4f67fb29f7659246ab0efc0c7b4d2f Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:22:23 -0500 Subject: [PATCH 33/82] Update README.md links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3840968..8cb8ec3 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,11 @@ After installing Rust, you can install WebGestaltR by running the following comm devtools::install_github("iblacksand/WebGestaltR") ``` -During installation, the Rust compiler will be called to build the computation library used by WebGestaltR. If you run into problems with installation of the new version, please [open a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). +During installation, the Rust compiler will be called to build the computation library used by WebGestaltR. If you run into problems with installation of the new version, please [open a new issue](https://github.com/iblacksand/WebGestaltR/issues/new/choose). ## Changes > [!NOTE] -> Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/bzhanglab/WebGestaltR/issues/new/choose). +> Besides the change in installation, there should be no difference in how the R package performs for existing use-cases. If you experience any difference in results that are not due to the data-update, that is considered a bug. [Please report the changes you experience in a new issue](https://github.com/iblacksand/WebGestaltR/issues/new/choose). WebGestaltR's core was re-written in Rust, which dramatically increased performance, with up to 15x the speed of previous versions. The new version also supports metabolomics, with support for 15 different ID types. From ad004a521a13dd878de5e79d896b1a5a62bdb22f Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:25:24 -0500 Subject: [PATCH 34/82] Update issue templates --- .github/ISSUE_TEMPLATE/r-vs--rust-difference.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/r-vs--rust-difference.md diff --git a/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md b/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md new file mode 100644 index 0000000..bcbb982 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md @@ -0,0 +1,10 @@ +--- +name: R vs. Rust Difference +about: Results differ between R and Rust Version +title: '' +labels: bug, Version Difference +assignees: iblacksand + +--- + +Description of the result that you previously got in the original version, and the new result that you got. From 5585bf2eba311ed21307079c26f9d694fccdd4f1 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:27:33 -0500 Subject: [PATCH 35/82] Update issue templates --- .github/ISSUE_TEMPLATE/bug-report.md | 10 ++++++++++ .github/ISSUE_TEMPLATE/question.md | 10 ++++++++++ 2 files changed, 20 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/question.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..fed0975 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,10 @@ +--- +name: Bug Report +about: Bug/crash in R package +title: '' +labels: bug +assignees: iblacksand + +--- + +Description of bug and steps to reproduce. diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000..74ae329 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: Question +about: Question about the R package that is not a bug. +title: '' +labels: question +assignees: iblacksand + +--- + + From 105f0ef07aec4a9d72b121ce8ddc40529d03ac7b Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:30:17 -0500 Subject: [PATCH 36/82] Update issue templates --- .github/ISSUE_TEMPLATE/installation-issue.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/installation-issue.md diff --git a/.github/ISSUE_TEMPLATE/installation-issue.md b/.github/ISSUE_TEMPLATE/installation-issue.md new file mode 100644 index 0000000..7992e74 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/installation-issue.md @@ -0,0 +1,11 @@ +--- +name: Installation Issue +about: Issue with installing the R package +title: '' +labels: Installation +assignees: iblacksand + +--- + +Platform (macOS, Windows, Linux, etc): PLATFORM +Description of issue: From 3133af1dabc518def9945156513fbadb03aca196 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 17 Oct 2023 11:32:58 -0500 Subject: [PATCH 37/82] fix ORA results not showing size --- R/oraEnrichment.R | 45 ++------------------------------------------- build_hash.py | 1 + install | 2 +- src/rust/Cargo.lock | 2 +- 4 files changed, 5 insertions(+), 45 deletions(-) diff --git a/R/oraEnrichment.R b/R/oraEnrichment.R index 08e8b00..3bcac37 100644 --- a/R/oraEnrichment.R +++ b/R/oraEnrichment.R @@ -27,29 +27,12 @@ oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, max intG <- filter(geneSet, .data$gene %in% interestGene) intGId <- tapply(intG$gene, intG$geneSet, paste, collapse = ";") intGId <- data.frame(geneSet = names(intGId), overlapId = as.character(intGId), stringsAsFactors = FALSE) - intGNum <- tapply(intG$gene, intG$geneSet, length) # a vector of overlap with geneset as name - intGNum <- data.frame(geneSet = names(intGNum), overlap = as.numeric(intGNum), stringsAsFactors = FALSE) genes <- tapply(geneSet$gene, geneSet$geneSet, rbind) rust_result <- ora_rust(names(genes), genes, interestGene, referenceGene) rust_result_df <- data.frame( FDR = rust_result$fdr, pValue = rust_result$p, expect = rust_result$expect, - enrichmentRatio = rust_result$enrichment_ratio, geneSet = rust_result$gene_set + enrichmentRatio = rust_result$enrichment_ratio, geneSet = rust_result$gene_set, overlap = rust_result$overlap ) - # geneSet <- geneSet %>% - # filter(!is.na(.data$geneSet)) %>% - # filter(.data$geneSet %in% names(geneSetNum)) %>% - # select(.data$geneSet, link = .data$description) %>% - # distinct() %>% - # left_join(refG, by = "geneSet") %>% - # left_join(intGNum, by = "geneSet") - # rust_result <- filter(rust_result_df, .data$geneSet %in% as.data.frame(geneSet)$geneSet) - # print(rust_result) - # enrichedResult <- data.frame( - # FDR = rust_result$fdr, pValue = rust_result$p, expect = rust_result$expect, overlap = rust_result$overlap, - # enrichmentRato = rust_result$enrichment_ratio, geneSet = rust_result$gene_set - # ) %>% - # left_join(intGId, by = "geneSet") %>% # get overlapping gene IDs - # arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) enrichedResult <- geneSet %>% filter(!is.na(.data$geneSet)) %>% @@ -57,36 +40,12 @@ oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, max select(.data$geneSet, link = .data$description) %>% distinct() %>% left_join(refG, by = "geneSet") %>% - left_join(intGNum, by = "geneSet") %>% # this may just be inner_join. O is NA should not be meaningful anyway - inner_join( + left_join( rust_result_df, by = "geneSet", ) %>% left_join(intGId, by = "geneSet") %>% # get overlapping gene IDs arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) - ############### Enrichment analysis################### - # ra <- length(interestGene) / length(referenceGene) # ratio - # refG <- data.frame(geneSet=names(geneSetNum), size=as.numeric(geneSetNum), stringsAsFactors=FALSE) - # intGNum <- tapply(intG$gene, intG$geneSet, length) # a vector of overlap with geneset as name - # intGNum <- data.frame(geneSet=names(intGNum), overlap=as.numeric(intGNum), stringsAsFactors=FALSE) - - # intGId <- tapply(intG$gene, intG$geneSet, paste, collapse=";") - # intGId <- data.frame(geneSet=names(intGId), overlapId=as.character(intGId), stringsAsFactors=FALSE) - - # # .data from rlang needed for package to pass R CMD check - # # https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html - # enrichedResult <- geneSet %>% filter(!is.na(.data$geneSet)) %>% - # filter(.data$geneSet %in% names(geneSetNum)) %>% - # select(.data$geneSet, link=.data$description) %>% distinct() %>% - # left_join(refG, by="geneSet") %>% - # left_join(intGNum, by="geneSet") %>% # this may just be inner_join. O is NA should not be meaningful anyway - # mutate(overlap=ifelse(is.na(.data$overlap), 0, .data$overlap), expect=.data$size * ra, enrichmentRatio=.data$overlap /.data$expect, - # pValue=1-phyper(.data$overlap - 1, length(interestGene), length(referenceGene) - length(interestGene), .data$size, lower.tail=TRUE, log.p=FALSE), - # FDR=p.adjust(.data$pValue, method=fdrMethod) - # ) %>% - # left_join(intGId, by="geneSet") %>% # get overlapping gene IDs - # arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) - if (sigMethod == "fdr") { enrichedResultSig <- filter(enrichedResult, .data$FDR < fdrThr) if (nrow(enrichedResultSig) == 0) { diff --git a/build_hash.py b/build_hash.py index dd08451..ee8239d 100644 --- a/build_hash.py +++ b/build_hash.py @@ -1,5 +1,6 @@ import os import datetime + base: str = """ #' WebGestaltR: The R interface for enrichment analysis with WebGestalt. #' diff --git a/install b/install index e768f51..b7bb656 100755 --- a/install +++ b/install @@ -1,5 +1,5 @@ #!/usr/bin/env bash -python3 build_hash.py +# python3 build_hash.py R -e "library(utils) remove.packages('WebGestaltR') q()" diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 95e1a49..d3c4be7 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -494,7 +494,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#a5006bfd8d26288c126d4117c2c0c513839370ac" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#e6149cd7b3936251e994f527ae6729c7b8dfb27a" dependencies = [ "adjustp", "csv", From 93a2f2a5c824cb976dc30bad92dbb12ed2b7f1a8 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 17 Oct 2023 11:45:01 -0500 Subject: [PATCH 38/82] add more description about Rust installation --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8cb8ec3..eebf430 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. +Make sure you restart your terminal after installing Rust to ensure the Rust compiler is available in your path. You can check that Rust is installed correctly by running `rustc --version` in your terminal. + After installing Rust, you can install WebGestaltR by running the following command in an R session: ```R From a97f5501d73fa6cf5841d8e584724f683ea467f9 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 17 Oct 2023 16:22:20 -0500 Subject: [PATCH 39/82] fix ORA fdrs --- R/oraEnrichment.R | 12 +++++++----- install | 4 ++++ src/rust/Cargo.lock | 13 +++---------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/R/oraEnrichment.R b/R/oraEnrichment.R index 3bcac37..63fadb7 100644 --- a/R/oraEnrichment.R +++ b/R/oraEnrichment.R @@ -27,6 +27,12 @@ oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, max intG <- filter(geneSet, .data$gene %in% interestGene) intGId <- tapply(intG$gene, intG$geneSet, paste, collapse = ";") intGId <- data.frame(geneSet = names(intGId), overlapId = as.character(intGId), stringsAsFactors = FALSE) + geneSetFilter <- geneSet %>% + filter(!is.na(.data$geneSet)) %>% + filter(.data$geneSet %in% names(geneSetNum)) %>% + select(.data$geneSet, link = .data$description) %>% + distinct() + geneSet <- geneSet[geneSet$geneSet %in% geneSetFilter$geneSet, ] genes <- tapply(geneSet$gene, geneSet$geneSet, rbind) rust_result <- ora_rust(names(genes), genes, interestGene, referenceGene) rust_result_df <- data.frame( @@ -34,11 +40,7 @@ oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, max enrichmentRatio = rust_result$enrichment_ratio, geneSet = rust_result$gene_set, overlap = rust_result$overlap ) - enrichedResult <- geneSet %>% - filter(!is.na(.data$geneSet)) %>% - filter(.data$geneSet %in% names(geneSetNum)) %>% - select(.data$geneSet, link = .data$description) %>% - distinct() %>% + enrichedResult <- geneSetFilter %>% left_join(refG, by = "geneSet") %>% left_join( rust_result_df, diff --git a/install b/install index b7bb656..709f7de 100755 --- a/install +++ b/install @@ -1,5 +1,9 @@ #!/usr/bin/env bash # python3 build_hash.py +cd src/rust +cargo update +cd .. +cd .. R -e "library(utils) remove.packages('WebGestaltR') q()" diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d3c4be7..70647e4 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -12,12 +12,6 @@ dependencies = [ "webgestalt_lib", ] -[[package]] -name = "adjustp" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "151571bfc58e15241979bf8efb3b25506a437483101149cdcbc2ff56276da850" - [[package]] name = "approx" version = "0.5.1" @@ -494,9 +488,8 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#e6149cd7b3936251e994f527ae6729c7b8dfb27a" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#18a01387157cfb4f1e41b0b86da3b36716b9ee8b" dependencies = [ - "adjustp", "csv", "rand", "rayon", @@ -507,9 +500,9 @@ dependencies = [ [[package]] name = "wide" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebecebefc38ff1860b4bc47550bbfa63af5746061cf0d29fcd7fa63171602598" +checksum = "c68938b57b33da363195412cfc5fc37c9ed49aa9cfe2156fde64b8d2c9498242" dependencies = [ "bytemuck", "safe_arch", From 94ffc7cf5813c1e96a388d72be312ffa3a316408 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 18 Oct 2023 10:23:54 -0500 Subject: [PATCH 40/82] fix kMedoid imports --- NAMESPACE | 1 + R/kMedoid.R | 104 ++++++++++++++++++++++++++----------------------- man/kMedoid.Rd | 18 +++++++++ 3 files changed, 75 insertions(+), 48 deletions(-) create mode 100644 man/kMedoid.Rd diff --git a/NAMESPACE b/NAMESPACE index 109acae..5c473b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ import(methods) import(utils) importFrom(Rcpp,sourceCpp) importFrom(apcluster,apcluster) +importFrom(cluster,pam) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(dplyr,"%>%") diff --git a/R/kMedoid.R b/R/kMedoid.R index 9e9cad4..2a0de13 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -1,56 +1,64 @@ -kMedoid <- function(idsInSet, score, maxK = 10){ - # first find out the union of sets, sorted - all.genes <- sort(unique(unlist(idsInSet))) - overlap.mat <- sapply(idsInSet, function(x) {as.integer(all.genes %in% x)}) +#' @title kMedoid +#' @description kMedoid clustering +#' @param idsInSet a list of sets of ids +#' @param score a vector of scores for each set +#' @param maxK maximum number of clusters +#' @importFrom cluster pam +kMedoid <- function(idsInSet, score, maxK = 10) { + # first find out the union of sets, sorted + all.genes <- sort(unique(unlist(idsInSet))) + overlap.mat <- sapply(idsInSet, function(x) { + as.integer(all.genes %in% x) + }) - num <- length(idsInSet) - if (num <= maxK) { - maxK <- num - 1 - } - sim.mat <- matrix(1, num, num) - colnames(sim.mat) <- colnames(overlap.mat) + num <- length(idsInSet) + if (num <= maxK) { + maxK <- num - 1 + } + sim.mat <- matrix(1, num, num) + colnames(sim.mat) <- colnames(overlap.mat) - if (num == 1) { - return(list(sim.mat=sim.mat, ip.vec=c(1))) - } + if (num == 1) { + return(list(sim.mat = sim.mat, ip.vec = c(1))) + } - for (i in 1:(num-1)) { - for (j in (i+1):num) { - x <- sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) - if (x == 0) { # if there is no overlap, set the similarity to -Inf - sim.mat[i, j] <- -Inf - sim.mat[j, i] <- -Inf - } else { - jaccardIndex <- sum(bitwAnd(overlap.mat[, i], overlap.mat[, j])) / x - sim.mat[i, j] <- jaccardIndex - sim.mat[j, i] <- jaccardIndex - } - } + for (i in 1:(num - 1)) { + for (j in (i + 1):num) { + x <- sum(bitwOr(overlap.mat[, i], overlap.mat[, j])) + if (x == 0) { # if there is no overlap, set the similarity to -Inf + sim.mat[i, j] <- -Inf + sim.mat[j, i] <- -Inf + } else { + jaccardIndex <- sum(bitwAnd(overlap.mat[, i], overlap.mat[, j])) / x + sim.mat[i, j] <- jaccardIndex + sim.mat[j, i] <- jaccardIndex + } } + } - if (max(sim.mat) == min(sim.mat)) { - # this will generate error, so randomy add some noise to off diagonal elements - mat.siz <- dim(sim.mat)[1] - rand.m <- matrix(rnorm(mat.siz*mat.siz,0,0.01),mat.siz) - # make it symmetric - rand.m[lower.tri(rand.m)] = t(rand.m)[lower.tri(rand.m)] - sim.mat <- sim.mat + rand.m - # make diagonal all 1 - diag(sim.mat) <- 1 - } + if (max(sim.mat) == min(sim.mat)) { + # this will generate error, so randomy add some noise to off diagonal elements + mat.siz <- dim(sim.mat)[1] + rand.m <- matrix(rnorm(mat.siz * mat.siz, 0, 0.01), mat.siz) + # make it symmetric + rand.m[lower.tri(rand.m)] <- t(rand.m)[lower.tri(rand.m)] + sim.mat <- sim.mat + rand.m + # make diagonal all 1 + diag(sim.mat) <- 1 + } - # compute the k-medoid clustering - kmRes <- pam(sim.mat, maxK, diss=TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. - - #sort clusters to make exemplar the first member - clusters <- vector(mode="list", length(kmRes$medoids)) - if(length(kmRes$medoids) == 0){ - return(NULL) - } - for (i in 1:length(clusters)) { - clusters[[i]] <- kmRes$clustering[[i]][order(kmRes$clustering[[i]] == i, decreasing=TRUE)] - } - # print(kmRes$medoids) - return(list(clusters=sapply(clusters, names), representatives=kmRes$medoids)) + # compute the k-medoid clustering + kmRes <- pam(sim.mat, maxK, diss = TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. + + # sort clusters to make exemplar the first member + clusters <- vector(mode = "list", length(kmRes$medoids)) + if (length(kmRes$medoids) == 0) { + return(NULL) + } + for (i in 1:length(clusters)) { + clusters[[i]] <- kmRes$clustering[[i]][order(kmRes$clustering[[i]] == i, decreasing = TRUE)] + } + # print(kmRes$medoids) + return(list(clusters = sapply(clusters, names), representatives = kmRes$medoids)) } diff --git a/man/kMedoid.Rd b/man/kMedoid.Rd new file mode 100644 index 0000000..c22aed0 --- /dev/null +++ b/man/kMedoid.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kMedoid.R +\name{kMedoid} +\alias{kMedoid} +\title{kMedoid} +\usage{ +kMedoid(idsInSet, score, maxK = 10) +} +\arguments{ +\item{idsInSet}{a list of sets of ids} + +\item{score}{a vector of scores for each set} + +\item{maxK}{maximum number of clusters} +} +\description{ +kMedoid clustering +} From 68c5ce1915bb1d9857b0e325990c1d9e93ee4a4c Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Wed, 18 Oct 2023 11:04:16 -0500 Subject: [PATCH 41/82] Update issue templates --- .github/ISSUE_TEMPLATE/r-vs--rust-difference.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md b/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md index bcbb982..38770d5 100644 --- a/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md +++ b/.github/ISSUE_TEMPLATE/r-vs--rust-difference.md @@ -7,4 +7,6 @@ assignees: iblacksand --- + + Description of the result that you previously got in the original version, and the new result that you got. From e63364c56d7499bac0129551443b834ae6f60f1a Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 18 Oct 2023 16:04:25 -0500 Subject: [PATCH 42/82] update readme and rust version --- .gitignore | 2 ++ DESCRIPTION | 4 +++- README.md | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c071546..953d576 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ vignettes/*.pdf .Renviron .vscode .DS_Store +_site +index.md diff --git a/DESCRIPTION b/DESCRIPTION index 3964d14..57805c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,6 +5,7 @@ Version: 0.4.6 Date: 2023-05-31 Authors@R: c( person("Jing", "Wang", email = "jingwang.uestc@gmail.com", role = "aut"), + person("John", "Elizarraras", email = "john.elizarraras@bcm.edu", role = c("aut","cre")), person("Yuxing", "Liao", email = "yuxingliao@gmail.com", role = c("aut", "cre")), person("Eric", "Jaehnig", email = "Eric.Jaehnig@bcm.edu", role = c("ctb")), person("Zhiao", "Shi", email = "Zhiao.Shi@bcm.edu", role = c("ctb")), @@ -22,5 +23,6 @@ Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), NeedsCompilation: yes LinkingTo: Rcpp RoxygenNote: 7.2.3 -SystemRequirements: Cargo (Rust's package manager), rustc v1.66.0+ +SystemRequirements: Cargo (Rust's package manager), rustc v1.63.0+ Config/rextendr/version: 0.3.1 +Encoding: UTF-8 diff --git a/README.md b/README.md index eebf430..26a7a08 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,11 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen ## Installation --- + #### Requirements - R (>= 4.0.0) -- Rust (>= 1.54.0) +- Rust (>= 1.63.0) --- From b4ec344cb9cd7447221220b4d36a2e9bccbe67a6 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:01:55 -0500 Subject: [PATCH 43/82] update rust download link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 26a7a08..b4a0b67 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen --- -Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/learn/get-started](https://www.rust-lang.org/learn/get-started). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. +Since WebGestaltR v1.0.0, Rust is used for core computations in the R package. Therefore, to install WebGestaltR, please download and install Rust from [https://www.rust-lang.org/tools/install](https://www.rust-lang.org/tools/install). For Mac, Linux, or Unix users, Rust can be installed from the command line, and Windows users can download a GUI installer. Make sure you restart your terminal after installing Rust to ensure the Rust compiler is available in your path. You can check that Rust is installed correctly by running `rustc --version` in your terminal. From 2b7ec3c471b2a21711835b58d3596fca07951343 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 19 Oct 2023 13:50:48 -0500 Subject: [PATCH 44/82] default to AHash and no weighted-set cover --- DESCRIPTION | 4 ++-- R/WebGestaltR.R | 2 +- R/WebGestaltRMultiOmics.R | 2 ++ man/WebGestaltR.Rd | 6 +++--- man/fill_input_data_frame.Rd | 3 +++ man/gsea_rust.Rd | 3 +++ man/ora_rust.Rd | 3 +++ src/rust/Cargo.lock | 23 +++++++++++++++++++++-- src/rust/Cargo.toml | 4 ++-- src/rust/src/lib.rs | 13 ++++++------- 10 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 R/WebGestaltRMultiOmics.R diff --git a/DESCRIPTION b/DESCRIPTION index 57805c8..560a2d4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,8 +5,8 @@ Version: 0.4.6 Date: 2023-05-31 Authors@R: c( person("Jing", "Wang", email = "jingwang.uestc@gmail.com", role = "aut"), - person("John", "Elizarraras", email = "john.elizarraras@bcm.edu", role = c("aut","cre")), - person("Yuxing", "Liao", email = "yuxingliao@gmail.com", role = c("aut", "cre")), + person("John", "Elizarraras", email = "john.elizarraras@bcm.edu", role = c("aut", "cre")), + person("Yuxing", "Liao", email = "yuxingliao@gmail.com", role = c("aut")), person("Eric", "Jaehnig", email = "Eric.Jaehnig@bcm.edu", role = c("ctb")), person("Zhiao", "Shi", email = "Zhiao.Shi@bcm.edu", role = c("ctb")), person("Quanhu", "Sheng", email = "shengqh@gmail.com", role = c("ctb")) diff --git a/R/WebGestaltR.R b/R/WebGestaltR.R index ca2bbc7..cc6c887 100644 --- a/R/WebGestaltR.R +++ b/R/WebGestaltR.R @@ -195,7 +195,7 @@ #' networkConstructionMethod="Network_Retrieval_Prioritization") #' } #' -WebGestaltR <- function(omic_type = "single", enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, interestGeneNames=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10, ...) { +WebGestaltR <- function(omic_type = "single", enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, interestGeneNames=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = FALSE, useAffinityPropagation = FALSE, usekMedoid = TRUE, kMedoid_k = 25, ...) { extraArgs <- list(...) if ('keepGSEAFolder' %in% names(extraArgs) | 'keepGseaFolder' %in% names(extraArgs)) { warning("Parameter keepGSEAFolder is obsolete.\n") diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R new file mode 100644 index 0000000..246b0fa --- /dev/null +++ b/R/WebGestaltRMultiOmics.R @@ -0,0 +1,2 @@ +WebGestaltRMultiOmics <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { +} diff --git a/man/WebGestaltR.Rd b/man/WebGestaltR.Rd index cd6e5f8..0d84def 100644 --- a/man/WebGestaltR.Rd +++ b/man/WebGestaltR.Rd @@ -48,10 +48,10 @@ WebGestaltR( nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", - useWeightedSetCover = TRUE, + useWeightedSetCover = FALSE, useAffinityPropagation = FALSE, - usekMedoid = FALSE, - kMedoid_k = 10, + usekMedoid = TRUE, + kMedoid_k = 25, ... ) diff --git a/man/fill_input_data_frame.Rd b/man/fill_input_data_frame.Rd index 6c7301f..77ecc3d 100644 --- a/man/fill_input_data_frame.Rd +++ b/man/fill_input_data_frame.Rd @@ -3,6 +3,9 @@ \name{fill_input_data_frame} \alias{fill_input_data_frame} \title{Fill relation data frame for GSEA input} +\usage{ +fill_input_data_frame(gmt, genes, gene_sets) +} \arguments{ \item{gmt}{A Data Frame with geneSet and gene columns from the GMT file} diff --git a/man/gsea_rust.Rd b/man/gsea_rust.Rd index d23f581..0bed739 100644 --- a/man/gsea_rust.Rd +++ b/man/gsea_rust.Rd @@ -3,6 +3,9 @@ \name{gsea_rust} \alias{gsea_rust} \title{Run GSEA using rust library} +\usage{ +gsea_rust(min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) +} \value{ List of the results of GSEA } diff --git a/man/ora_rust.Rd b/man/ora_rust.Rd index b4b6586..4c5ed95 100644 --- a/man/ora_rust.Rd +++ b/man/ora_rust.Rd @@ -3,6 +3,9 @@ \name{ora_rust} \alias{ora_rust} \title{Run ORA using Rust library} +\usage{ +ora_rust(sets, parts, interest, reference) +} \description{ Run ORA using Rust library } diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 70647e4..b3d33fc 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -6,12 +6,24 @@ version = 3 name = "WebGestaltR" version = "0.1.0" dependencies = [ + "ahash", "extendr-api", "ndarray", - "rustc-hash", "webgestalt_lib", ] +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "approx" version = "0.5.1" @@ -479,6 +491,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -488,8 +506,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#18a01387157cfb4f1e41b0b86da3b36716b9ee8b" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git?branch=dev#b5869bef34715dab8bd57e6482844548fd5ad829" dependencies = [ + "ahash", "csv", "rand", "rayon", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index fe1cd0a..ac11c7d 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -8,7 +8,7 @@ crate-type = ['staticlib'] name = 'WebGestaltR' [dependencies] +ahash = "0.8.3" extendr-api = { version = '0.6.0' } ndarray = "0.15.6" -rustc-hash = "1.1.0" -webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git" } +webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git", branch = "dev" } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f7ac8d7..0c5f2d2 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,8 +1,8 @@ use std::vec; +use ahash::{AHashMap, AHashSet}; use extendr_api::prelude::*; use ndarray::Array2; -use rustc_hash::{FxHashMap, FxHashSet}; use webgestalt_lib::{ methods::gsea::{GSEAConfig, RankListItem}, methods::ora::{get_ora, ORAConfig, ORAResult}, @@ -38,10 +38,9 @@ fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { parts: parts_vec[i].clone(), }) } - let interest_set: FxHashSet = - FxHashSet::from_iter(interest.as_string_vector().unwrap()); - let reference_set: FxHashSet = - FxHashSet::from_iter(reference.as_string_vector().unwrap()); + let interest_set: AHashSet = AHashSet::from_iter(interest.as_string_vector().unwrap()); + let reference_set: AHashSet = + AHashSet::from_iter(reference.as_string_vector().unwrap()); let res: Vec = get_ora(&interest_set, &reference_set, gmt, config); let mut p: Vec = Vec::new(); let mut fdr: Vec = Vec::new(); @@ -170,8 +169,8 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { let genes_vec = genes.as_string_vector().unwrap(); let gene_set_vec = gene_sets.as_string_vector().unwrap(); let mut value_array = Array2::zeros((genes_vec.len(), gene_set_vec.len())); - let mut gene_index: FxHashMap<&String, usize> = FxHashMap::default(); - let mut set_index: FxHashMap<&String, usize> = FxHashMap::default(); + let mut gene_index: AHashMap<&String, usize> = AHashMap::default(); + let mut set_index: AHashMap<&String, usize> = AHashMap::default(); let gmt_set: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); let gmt_gene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); for (i, val) in genes_vec.iter().enumerate() { From 54095948106cfcd3cb125af0b03c0ffa719ad735 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 24 Oct 2023 13:18:52 -0500 Subject: [PATCH 45/82] fix kMedoid output and make rust version use master branch --- .Rbuildignore | 2 ++ DESCRIPTION | 2 +- R/kMedoid.R | 11 ++++++----- dev_install | 14 ++++++++++++++ install | 5 ----- src/rust/Cargo.lock | 27 ++++++++++++++++++++++++--- src/rust/Cargo.toml | 2 +- src/rust/src/lib.rs | 1 + 8 files changed, 49 insertions(+), 15 deletions(-) create mode 100644 dev_install diff --git a/.Rbuildignore b/.Rbuildignore index a3f921f..f1fa43a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ ^src/\.cargo$ ^.*\.Rproj$ ^\.Rproj\.user$ +^\.github$ +^\.lintr$ diff --git a/DESCRIPTION b/DESCRIPTION index 560a2d4..db38548 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ Depends: R (>= 3.3) Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), doParallel (>= 1.0.10), foreach (>= 1.4.0), jsonlite, httr, rlang, svglite, - igraph, whisker, apcluster, Rcpp + igraph, whisker, apcluster, Rcpp, cluster, rextendr NeedsCompilation: yes LinkingTo: Rcpp RoxygenNote: 7.2.3 diff --git a/R/kMedoid.R b/R/kMedoid.R index 2a0de13..b1e6d5d 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -52,13 +52,14 @@ kMedoid <- function(idsInSet, score, maxK = 10) { kmRes <- pam(sim.mat, maxK, diss = TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. # sort clusters to make exemplar the first member - clusters <- vector(mode = "list", length(kmRes$medoids)) - if (length(kmRes$medoids) == 0) { + clusters <- vector(mode = "list", length(kmRes$id.med)) + if (length(kmRes$id.med) == 0) { return(NULL) } - for (i in 1:length(clusters)) { - clusters[[i]] <- kmRes$clustering[[i]][order(kmRes$clustering[[i]] == i, decreasing = TRUE)] + cluster_info <- kmRes$silinfo[["widths"]][, 1] + for (i in seq_along(kmRes$id.med)) { + clusters[[i]] <- names(cluster_info[cluster_info == i]) } # print(kmRes$medoids) - return(list(clusters = sapply(clusters, names), representatives = kmRes$medoids)) + return(list(clusters = clusters, representatives = kmRes$medoids)) } diff --git a/dev_install b/dev_install new file mode 100644 index 0000000..a2a19a9 --- /dev/null +++ b/dev_install @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +cd src/rust +cargo update +cd .. +cd .. +R -e "library(utils) +remove.packages('WebGestaltR') +q()" +echo "Old version removed" +R -e "library(devtools) +rextendr::document() +install('.') +q()" +echo "New version installed" diff --git a/install b/install index 709f7de..03b2203 100755 --- a/install +++ b/install @@ -1,9 +1,4 @@ #!/usr/bin/env bash -# python3 build_hash.py -cd src/rust -cargo update -cd .. -cd .. R -e "library(utils) remove.packages('WebGestaltR') q()" diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index b3d33fc..d8d5e39 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -14,14 +14,15 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "cd7d5a2cecb58716e47d67d5703a249964b14c7be1ec3cad3affc295b2d1c35d" dependencies = [ "cfg-if", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -506,7 +507,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git?branch=dev#b5869bef34715dab8bd57e6482844548fd5ad829" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#64cf6bf4f7cb15b431199231b540b6118799eb9e" dependencies = [ "ahash", "csv", @@ -526,3 +527,23 @@ dependencies = [ "bytemuck", "safe_arch", ] + +[[package]] +name = "zerocopy" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8db0ac2df3d060f81ec0380ccc5b71c2a7c092cfced671feeee1320e95559c87" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b6093bc6d5265ff40b479c834cdd25d8e20784781a2a29a8106327393d0a9ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index ac11c7d..a375afd 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -11,4 +11,4 @@ name = 'WebGestaltR' ahash = "0.8.3" extendr-api = { version = '0.6.0' } ndarray = "0.15.6" -webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git", branch = "dev" } +webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git" } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 0c5f2d2..4e832e5 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -8,6 +8,7 @@ use webgestalt_lib::{ methods::ora::{get_ora, ORAConfig, ORAResult}, readers::utils::Item, }; + /// Return string `"Hello world!"` to R. /// @export #[extendr] From 69d93f1230902f3007a6b13cbf13e65025e9d491 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 24 Oct 2023 16:25:28 -0500 Subject: [PATCH 46/82] make k-Medoid deterministic --- R/kMedoid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kMedoid.R b/R/kMedoid.R index b1e6d5d..d7a0771 100644 --- a/R/kMedoid.R +++ b/R/kMedoid.R @@ -49,7 +49,7 @@ kMedoid <- function(idsInSet, score, maxK = 10) { } # compute the k-medoid clustering - kmRes <- pam(sim.mat, maxK, diss = TRUE, variant = "faster") # TODO: Make parameter for number of clusters. Currently set to 5. + kmRes <- pam(sim.mat, maxK, diss = TRUE, variant = "faster", medoids = 1:maxK) # TODO: Make parameter for number of clusters. Currently set to 5. # sort clusters to make exemplar the first member clusters <- vector(mode = "list", length(kmRes$id.med)) From e7b252f07dd9f0fb23bf32e6ac799c54d7f17758 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 25 Oct 2023 15:51:03 -0500 Subject: [PATCH 47/82] use p.adjust instead of rust package for fdr methods --- R/oraEnrichment.R | 2 +- src/rust/Cargo.lock | 14 +++++++------- src/rust/src/lib.rs | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/R/oraEnrichment.R b/R/oraEnrichment.R index 63fadb7..e0e6f39 100644 --- a/R/oraEnrichment.R +++ b/R/oraEnrichment.R @@ -36,7 +36,7 @@ oraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, max genes <- tapply(geneSet$gene, geneSet$geneSet, rbind) rust_result <- ora_rust(names(genes), genes, interestGene, referenceGene) rust_result_df <- data.frame( - FDR = rust_result$fdr, pValue = rust_result$p, expect = rust_result$expect, + FDR = p.adjust(rust_result$p, method = fdrMethod), pValue = rust_result$p, expect = rust_result$expect, enrichmentRatio = rust_result$enrichment_ratio, geneSet = rust_result$gene_set, overlap = rust_result$overlap ) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d8d5e39..473c478 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -14,9 +14,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7d5a2cecb58716e47d67d5703a249964b14c7be1ec3cad3affc295b2d1c35d" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", "getrandom", @@ -507,7 +507,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#64cf6bf4f7cb15b431199231b540b6118799eb9e" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#ce38f27edf237ceadba32f8951112362cff0ad29" dependencies = [ "ahash", "csv", @@ -530,18 +530,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.12" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8db0ac2df3d060f81ec0380ccc5b71c2a7c092cfced671feeee1320e95559c87" +checksum = "81ba595b9f2772fbee2312de30eeb80ec773b4cb2f1e8098db024afadda6c06f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.12" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b6093bc6d5265ff40b479c834cdd25d8e20784781a2a29a8106327393d0a9ff" +checksum = "772666c41fb6dceaf520b564b962d738a8e1a83b41bd48945f50837aed78bb1d" dependencies = [ "proc-macro2", "quote", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 4e832e5..f1e9692 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -22,6 +22,7 @@ fn rust_hello_world() -> &'static str { #[extendr] fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { let config: ORAConfig = ORAConfig { + fdr_method: webgestalt_lib::stat::AdjustmentMethod::None, ..Default::default() }; let mut gmt: Vec = Vec::new(); From c8d3320f1bc4db434eae0b29ba9eed4f84d7b218 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 30 Oct 2023 10:28:50 -0500 Subject: [PATCH 48/82] fix GSEA gene set names being incorrectly format and not showing des names --- R/gseaEnrichment.R | 4 +++- R/readGmt.R | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R index a21f02d..1ce225e 100644 --- a/R/gseaEnrichment.R +++ b/R/gseaEnrichment.R @@ -129,9 +129,11 @@ plotEnrichmentPlot <- function(title, outputDir, fileName, format="png", running svglite(file.path(outputDir, paste0(sanitizeFileName(fileName), ".svg")), bg="transparent", width=7, height=7) cex <- list(main=1.5, axis=0.6, lab=0.8) # svg seems to have a problem with long title (figure margins too large) - if (nchar(title) > 80) { + if (!is.na(nchar(title))) { + if (nchar(title) > 80) { title = paste0(substr(title, 1, 80), "...") } + } } wrappedTitle <- strwrap(paste0("Enrichment plot: ", title), 60) plot.new() diff --git a/R/readGmt.R b/R/readGmt.R index 0a01fa6..25d9f07 100644 --- a/R/readGmt.R +++ b/R/readGmt.R @@ -69,6 +69,7 @@ prepareInputMatrixGsea <- function(rank, gmt) { # C++ implementation # rel <- fillInputDataFrame(gmt, genes, geneSets) rel <- as.data.frame(fill_input_data_frame(gmt, genes, geneSets)) + colnames(rel) <- geneSets rel$gene <- genes # R implementation # rel <- matrix(0, nrow=length(genes), ncol=length(geneSets), dimnames=list(genes, geneSets)) From 8a2cbbbfb8dee98738b5f0ccacd5b407ae3cb9e8 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 31 Oct 2023 16:17:37 -0500 Subject: [PATCH 49/82] add start of multiomics --- NAMESPACE | 1 - R/WebGestaltRBatch.R | 76 ++++++++++++++++--------------- R/WebGestaltRGsea.R | 8 +++- R/WebGestaltRMultiOmics.R | 93 ++++++++++++++++++++++++++++++++++++- R/extendr-wrappers.R | 4 -- R/gseaEnrichment.R | 2 +- man/rust_hello_world.Rd | 11 ----- src/rust/Cargo.lock | 18 ++++---- src/rust/Cargo.toml | 2 +- src/rust/src/lib.rs | 96 ++++++++++++++++++++++++++++++++++----- 10 files changed, 234 insertions(+), 77 deletions(-) delete mode 100644 man/rust_hello_world.Rd diff --git a/NAMESPACE b/NAMESPACE index 5c473b6..b2159f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,6 @@ export(ora_rust) export(prepareGseaInput) export(prepareInputMatrixGsea) export(readGmt) -export(rust_hello_world) export(swGsea) export(weightedSetCover) import(grDevices) diff --git a/R/WebGestaltRBatch.R b/R/WebGestaltRBatch.R index 91012a0..036888e 100644 --- a/R/WebGestaltRBatch.R +++ b/R/WebGestaltRBatch.R @@ -13,48 +13,50 @@ #' @aliases WebGestaltR_batch #' @rdname WebGestaltR #' -WebGestaltRBatch <- function(interestGeneFolder=NULL, enrichMethod="ORA", isParallel=FALSE, nThreads=3, ...) { - args <- list(...) - if(enrichMethod=="ORA" || enrichMethod=="NTA"){ - interestGeneFiles <- list.files(interestGeneFolder,pattern="\\.txt",full.names=TRUE) - } +WebGestaltRBatch <- function(interestGeneFolder = NULL, enrichMethod = "ORA", isParallel = FALSE, nThreads = 3, ...) { + args <- list(...) + if (enrichMethod == "ORA" || enrichMethod == "NTA") { + interestGeneFiles <- list.files(interestGeneFolder, pattern = "\\.txt", full.names = TRUE) + } - if(enrichMethod=="GSEA"){ - interestGeneFiles <- list.files(interestGeneFolder,pattern="\\.rnk",full.names=TRUE) - } + if (enrichMethod == "GSEA") { + interestGeneFiles <- list.files(interestGeneFolder, pattern = "\\.rnk", full.names = TRUE) + } - projectNames <- unlist(lapply(strsplit(basename(interestGeneFiles),split=".",fixed=TRUE),function(e){return(paste(e[-length(e)],collapse="."))})) + projectNames <- unlist(lapply(strsplit(basename(interestGeneFiles), split = ".", fixed = TRUE), function(e) { + return(paste(e[-length(e)], collapse = ".")) + })) - resultList <- list() + resultList <- list() - if(isParallel==TRUE){ - cl <- makeCluster(nThreads) - registerDoParallel(cl) - resultList <- foreach(i=1:length(interestGeneFiles), .packages="WebGestaltR") %dopar% { - args$interestGeneFile <- interestGeneFiles[i] - args$projectName <- projectNames[i] - args$enrichMethod <- enrichMethod - sig <- do.call(WebGestaltR, args) - re <- list(filename=interestGeneFiles[i], enrichResult=sig) - return(re) - } - stopCluster(cl) - }else{ - for(i in c(1:length(interestGeneFiles))){ - cat("Process file: ",interestGeneFiles[i],"\n",sep="") - args$interestGeneFile <- interestGeneFiles[i] - args$projectName <- projectNames[i] - args$enrichMethod <- enrichMethod - sig <- do.call(WebGestaltR, args) - re <- list(filename=interestGeneFiles[i], enrichResult=sig) - resultList[[i]] <- re - } - } - return(resultList) + if (isParallel == TRUE) { + cl <- makeCluster(nThreads) + registerDoParallel(cl) + resultList <- foreach(i = 1:length(interestGeneFiles), .packages = "WebGestaltR") %dopar% { + args$interestGeneFile <- interestGeneFiles[i] + args$projectName <- projectNames[i] + args$enrichMethod <- enrichMethod + sig <- do.call(WebGestaltR, args) + re <- list(filename = interestGeneFiles[i], enrichResult = sig) + return(re) + } + stopCluster(cl) + } else { + for (i in c(1:length(interestGeneFiles))) { + cat("Process file: ", interestGeneFiles[i], "\n", sep = "") + args$interestGeneFile <- interestGeneFiles[i] + args$projectName <- projectNames[i] + args$enrichMethod <- enrichMethod + sig <- do.call(WebGestaltR, args) + re <- list(filename = interestGeneFiles[i], enrichResult = sig) + resultList[[i]] <- re + } + } + return(resultList) } #' @export -WebGestaltR_batch <- function(is.output=TRUE, ...) { - warning("Function WebGestaltR_batch is deprecated and changed to WebGestaltRBatch!\n") - return(WebGestaltRBatch(isOutput=is.output, ...)) +WebGestaltR_batch <- function(is.output = TRUE, ...) { + warning("Function WebGestaltR_batch is deprecated and changed to WebGestaltRBatch!\n") + return(WebGestaltRBatch(isOutput = is.output, ...)) } diff --git a/R/WebGestaltRGsea.R b/R/WebGestaltRGsea.R index bbdea58..35f0225 100644 --- a/R/WebGestaltRGsea.R +++ b/R/WebGestaltRGsea.R @@ -1,6 +1,12 @@ #' @importFrom dplyr select distinct left_join arrange %>% mutate #' @importFrom readr write_tsv -WebGestaltRGsea <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, setCoverNum=10, perNum=1000, p=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="binary", saveRawGseaResult=FALSE, plotFormat="png", nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { +WebGestaltRGsea <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, + interestGeneType = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", + sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, + isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, + plotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, + useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "GSEA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 246b0fa..10c9abe 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -1,2 +1,93 @@ -WebGestaltRMultiOmics <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { +WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, + enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, + collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, + topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), + projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, + hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, + usekMedoid = FALSE, kMedoid_k = 10, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", + referenceLists = NULL, referenceTypes = NULL) { + VALID_MERGE_METHODS <- c("mean", "max") + VALID_NORM_METHODS <- c("rank", "median", "mean") + VALID_ENRICH_METHODS <- c("ORA", "GSEA") + + # Null Check + analyteLists <- testNull(analyteLists) + analyteListFiles <- testNull(analyteListFiles) + analyteTypes <- testNull(analyteTypes) + enrichMethod <- testNull(enrichMethod) + organism <- testNull(organism) + enrichDatabase <- testNull(enrichDatabase) + enrichDatabaseFile <- testNull(enrichDatabaseFile) + enrichDatabaseType <- testNull(enrichDatabaseType) + enrichDatabaseDescriptionFile <- testNull(enrichDatabaseDescriptionFile) + referenceLists <- testNull(referenceLists) + referenceTypes <- testNull(referenceTypes) + error_msg <- parameterErrorMessage( + enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, + fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, + outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache + ) + if (!is.null(error_msg)) { + stop(error_msg) + } + + # Verify parameters + mergeMethod <- tolower(mergeMethod) + normalizationMethod <- tolower(normalizationMethod) + enrichMethod <- toupper(enrichMethod) + if (enrichMethod == "ORA" && !isMetaAnalysis) { + stop("ORA only supports meta-analysis. isMetaAnalysis must be set to TRUE") + } + if (!(mergeMethod %in% VALID_MERGE_METHODS)) { + stop(paste0(mergeMethod, " is not a valid merge method.\nValid options are: ", paste(VALID_MERGE_METHODS, collapse = ", "))) + } + if (!(normalizationMethod %in% VALID_NORM_METHODS)) { + stop(paste0(normalizationMethod, " is not a valid normalization method.\nValid options are: ", paste(VALID_NORM_METHODS, collapse = ", "))) + } + if (length(analyteLists) != length(analyteTypes) && length(analyteListFiles) != length(analyteTypes)) { + stop("analyte lists and analyteTypes must be the same length.") + } + if (!(enrichMethod %in% VALID_ENRICH_METHODS)) { + stop(paste0(enrichMethod, " is not a valid enrichment method for multiomics.\nValid options are: ", paste(VALID_ENRICH_METHODS, collapse = ", "))) + } + if (length(enrichDatabase) > 1 || length(enrichDatabaseFile) > 1) { + stop("Only one enrichDatabase or enrichDatabaseFile can be specified for multiomics.") + } + + if (enrichMethod == "ORA") { + cat("Performing multi-omics ORA\nLoading the functional categories...\n") + enrichD <- loadGeneSet( + organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName + ) + + geneSet <- enrichD$geneSet + geneSetDes <- enrichD$geneSetDes + geneSetDag <- enrichD$geneSetDag + geneSetNet <- enrichD$geneSetNet + databaseStandardId <- enrichD$standardId + rm(enrichD) + + cat("Loading the ID lists...\n") + interest_lists <- list() + if (is.null(analyteLists)) { + for (i in analyteLists) { + interestingGeneMap <- loadInterestGene( + organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, + geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, + hostName = hostName, geneSet = geneSet + ) + if (organism == "others") { + interestGeneList <- unique(interestingGeneMap) + interest_lists[[i]] <- interestGeneList + } else { + interestStandardId <- interestingGeneMap$standardId + interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) + interest_lists[[i]] <- interestGeneList + } + } + } + + ## Meta-analysis + } } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index cd262d0..5ae5a1e 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -11,10 +11,6 @@ #' @useDynLib WebGestaltR, .registration = TRUE NULL -#' Return string `"Hello world!"` to R. -#' @export -rust_hello_world <- function() .Call(wrap__rust_hello_world) - #' Fill relation data frame for GSEA input #' #' Fill 1 for gene in gene set diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R index 1ce225e..cd8c0c1 100644 --- a/R/gseaEnrichment.R +++ b/R/gseaEnrichment.R @@ -1,6 +1,6 @@ #' @importFrom dplyr select distinct filter arrange mutate left_join %>% #' @importFrom readr write_tsv -gseaEnrichment <- function (hostName, outputDirectory, projectName, geneRankList, geneSet, geneSetDes=NULL, collapseMethod="mean", minNum=10, maxNum=500, sigMethod="fdr", fdrThr=0.05, topThr=10, perNum=1000, p=1, isOutput=TRUE, saveRawGseaResult=FALSE, plotFormat="png", nThreads=1) { +gseaEnrichment <- function(hostName, outputDirectory, projectName, geneRankList, geneSet, geneSetDes=NULL, collapseMethod="mean", minNum=10, maxNum=500, sigMethod="fdr", fdrThr=0.05, topThr=10, perNum=1000, p=1, isOutput=TRUE, saveRawGseaResult=FALSE, plotFormat="png", nThreads=1) { projectFolder <- file.path(outputDirectory, paste("Project_", projectName, sep="")) if (!dir.exists(projectFolder)) { dir.create(projectFolder) diff --git a/man/rust_hello_world.Rd b/man/rust_hello_world.Rd deleted file mode 100644 index fe92dd0..0000000 --- a/man/rust_hello_world.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extendr-wrappers.R -\name{rust_hello_world} -\alias{rust_hello_world} -\title{Return string `"Hello world!"` to R.} -\usage{ -rust_hello_world() -} -\description{ -Return string `"Hello world!"` to R. -} diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 473c478..da40a7f 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -414,18 +414,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.189" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.189" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" dependencies = [ "proc-macro2", "quote", @@ -507,7 +507,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#ce38f27edf237ceadba32f8951112362cff0ad29" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#963575b5054366a9c8730142b60a9dc53527ee19" dependencies = [ "ahash", "csv", @@ -530,18 +530,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.15" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ba595b9f2772fbee2312de30eeb80ec773b4cb2f1e8098db024afadda6c06f" +checksum = "dd66a62464e3ffd4e37bd09950c2b9dd6c4f8767380fabba0d523f9a775bc85a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.15" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "772666c41fb6dceaf520b564b962d738a8e1a83b41bd48945f50837aed78bb1d" +checksum = "255c4596d41e6916ced49cfafea18727b24d67878fa180ddfd69b9df34fd1726" dependencies = [ "proc-macro2", "quote", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index a375afd..4dff1e8 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -8,7 +8,7 @@ crate-type = ['staticlib'] name = 'WebGestaltR' [dependencies] +webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git" } ahash = "0.8.3" extendr-api = { version = '0.6.0' } ndarray = "0.15.6" -webgestalt_lib = { git = "https://github.com/bzhanglab/webgestalt_rust.git" } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f1e9692..a4931a3 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -5,17 +5,13 @@ use extendr_api::prelude::*; use ndarray::Array2; use webgestalt_lib::{ methods::gsea::{GSEAConfig, RankListItem}, - methods::ora::{get_ora, ORAConfig, ORAResult}, + methods::{ + multiomics::{multiomic_ora, ORAJob}, + ora::{get_ora, ORAConfig, ORAResult}, + }, readers::utils::Item, }; -/// Return string `"Hello world!"` to R. -/// @export -#[extendr] -fn rust_hello_world() -> &'static str { - "Hello world!" -} - /// Run ORA using Rust library /// @name ora_rust /// @export @@ -113,7 +109,7 @@ fn gsea_rust( rank: ranks_vec[i], }) } - let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); // TODO: Convert dataframe to GMT + let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); let mut fdr: Vec = Vec::new(); let mut p: Vec = Vec::new(); let mut leading_edge: Vec = Vec::new(); @@ -190,7 +186,85 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { } // Construct DataFrame in R. Create list for now. List::from_names_and_values(gene_set_vec, gene_set_val).unwrap() - // data_frame!(x = 1) +} + +#[extendr] +pub fn rust_multiomics_ora( + sets: Robj, + parts: Robj, + interest: Robj, + reference: Robj, + method: Robj, +) -> List { + let config: ORAConfig = ORAConfig { + fdr_method: webgestalt_lib::stat::AdjustmentMethod::None, + ..Default::default() + }; + let reference_list = reference.as_list().unwrap(); + let method = match method.as_str().unwrap() { + "fisher" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Fisher, + ), + _ => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Stouffer, + ), + }; + let mut gmt: Vec = Vec::new(); + let set_vec = sets.as_str_vector().unwrap(); + let parts_vec: Vec> = parts + .as_list() + .unwrap() + .iter() + .map(|(_, x)| x.as_string_vector().unwrap()) + .collect(); + for (i, set) in set_vec.iter().enumerate() { + gmt.push(Item { + id: set.to_string(), + url: String::default(), + parts: parts_vec[i].clone(), + }) + } + let mut jobs: Vec = Vec::new(); + for (i, (_, list)) in interest.as_list().unwrap().into_iter().enumerate() { + let interest_set: AHashSet = AHashSet::from_iter(list.as_string_vector().unwrap()); + let reference_set: AHashSet = + AHashSet::from_iter(reference_list[i].as_string_vector().unwrap()); + let job = ORAJob { + gmt: gmt.clone(), + interest_list: interest_set.clone(), + reference_list: reference_set.clone(), + config: config.clone(), + }; + jobs.push(job) + } + + let res: Vec> = multiomic_ora(jobs, method); + let mut all_res: Vec = Vec::new(); + for analysis in res { + let mut p: Vec = Vec::new(); + let mut fdr: Vec = Vec::new(); + let mut expect: Vec = Vec::new(); + let mut enrichment_ratio: Vec = Vec::new(); + let mut overlap: Vec = Vec::new(); + let mut gene_set: Vec = Vec::new(); + for row in analysis { + gene_set.push(row.set); + p.push(row.p); + fdr.push(row.fdr); + expect.push(row.expected); + overlap.push(row.overlap); + enrichment_ratio.push(row.enrichment_ratio); + } + all_res.push(list!( + p = p, + gene_set = gene_set, + fdr = fdr, + expect = expect, + overlap = overlap, + enrichment_ratio = enrichment_ratio + )); + } + List::from_values(all_res) } // Macro to generate exports. @@ -198,8 +272,8 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { // See corresponding C code in `entrypoint.c`. extendr_module! { mod WebGestaltR; - fn rust_hello_world; fn fill_input_data_frame; fn gsea_rust; fn ora_rust; + fn rust_multiomics_ora; } From 44b08a4da82b35ed7547f4127817cc523f576c7b Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 2 Nov 2023 11:04:20 -0500 Subject: [PATCH 50/82] add multiomics GSEA --- src/rust/src/lib.rs | 295 ++++++++++++++++++++++++++++++-------------- 1 file changed, 201 insertions(+), 94 deletions(-) diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index a4931a3..360cd84 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -6,12 +6,64 @@ use ndarray::Array2; use webgestalt_lib::{ methods::gsea::{GSEAConfig, RankListItem}, methods::{ - multiomics::{multiomic_ora, ORAJob}, + gsea::FullGSEAResult, + multiomics::{multiomic_gsea, multiomic_ora, GSEAJob, ORAJob}, ora::{get_ora, ORAConfig, ORAResult}, }, readers::utils::Item, }; +/// Fill relation data frame for GSEA input +/// +/// Fill 1 for gene in gene set +/// +/// See https://github.com/extendr/extendr/issues/612 for how to export DataFrame +/// +/// ## Diagram +/// ```shell +/// Gene Sets +/// ┌───────────┐ First column named 'gene' containing gene name +/// │A0100110100│ 1 = in set +/// Genes │B0100101000│ 0 = not in set +/// │C1011101001│ Due to limitiations with extendr-api v 0.6.0, +/// └───────────┘ function returns a list, and the R package will +/// add the first 'gene' column +/// ``` +/// @param gmt A Data Frame with geneSet and gene columns from the GMT file +/// @param genes A vector of genes +/// @param gene_sets A vector of gene sets +/// +/// @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. +/// @author John Elizarraras +/// @name fill_input_data_frame +/// @keywords internal +/// @export +#[extendr] +pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { + let genes_vec = genes.as_string_vector().unwrap(); + let gene_set_vec = gene_sets.as_string_vector().unwrap(); + let mut value_array = Array2::zeros((genes_vec.len(), gene_set_vec.len())); + let mut gene_index: AHashMap<&String, usize> = AHashMap::default(); + let mut set_index: AHashMap<&String, usize> = AHashMap::default(); + let gmt_set: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); + let gmt_gene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); + for (i, val) in genes_vec.iter().enumerate() { + gene_index.insert(val, i); + } + for (i, val) in gene_set_vec.iter().enumerate() { + set_index.insert(val, i); + } + for i in 0..gmt_set.len() { + value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; + } + let mut gene_set_val: Vec> = Vec::new(); + for i in 0..value_array.len_of(ndarray::Axis(1)) { + gene_set_val.push(value_array.column(i).to_vec()) + } + // Construct DataFrame in R. Create list for now. + List::from_names_and_values(gene_set_vec, gene_set_val).unwrap() +} + /// Run ORA using Rust library /// @name ora_rust /// @export @@ -64,6 +116,85 @@ fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { ) } +#[extendr] +pub fn rust_multiomics_ora( + sets: Robj, + parts: Robj, + interest: Robj, + reference: Robj, + method: Robj, +) -> List { + let config: ORAConfig = ORAConfig { + fdr_method: webgestalt_lib::stat::AdjustmentMethod::None, + ..Default::default() + }; + let reference_list = reference.as_list().unwrap(); + let method = match method.as_str().unwrap() { + "fisher" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Fisher, + ), + _ => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Stouffer, + ), + }; + let mut gmt: Vec = Vec::new(); + let set_vec = sets.as_str_vector().unwrap(); + let parts_vec: Vec> = parts + .as_list() + .unwrap() + .iter() + .map(|(_, x)| x.as_string_vector().unwrap()) + .collect(); + for (i, set) in set_vec.iter().enumerate() { + gmt.push(Item { + id: set.to_string(), + url: String::default(), + parts: parts_vec[i].clone(), + }) + } + let mut jobs: Vec = Vec::new(); + for (i, (_, list)) in interest.as_list().unwrap().into_iter().enumerate() { + let interest_set: AHashSet = AHashSet::from_iter(list.as_string_vector().unwrap()); + let reference_set: AHashSet = + AHashSet::from_iter(reference_list[i].as_string_vector().unwrap()); + let job = ORAJob { + gmt: gmt.clone(), + interest_list: interest_set.clone(), + reference_list: reference_set.clone(), + config: config.clone(), + }; + jobs.push(job) + } + + let res: Vec> = multiomic_ora(jobs, method); + let mut all_res: Vec = Vec::new(); + for analysis in res { + let mut p: Vec = Vec::new(); + let mut fdr: Vec = Vec::new(); + let mut expect: Vec = Vec::new(); + let mut enrichment_ratio: Vec = Vec::new(); + let mut overlap: Vec = Vec::new(); + let mut gene_set: Vec = Vec::new(); + for row in analysis { + gene_set.push(row.set); + p.push(row.p); + fdr.push(row.fdr); + expect.push(row.expected); + overlap.push(row.overlap); + enrichment_ratio.push(row.enrichment_ratio); + } + all_res.push(list!( + p = p, + gene_set = gene_set, + fdr = fdr, + expect = expect, + overlap = overlap, + enrichment_ratio = enrichment_ratio + )); + } + List::from_values(all_res) +} + /// Run GSEA using rust library /// @return List of the results of GSEA /// @name gsea_rust @@ -137,77 +268,45 @@ fn gsea_rust( ) } -/// Fill relation data frame for GSEA input -/// -/// Fill 1 for gene in gene set -/// -/// See https://github.com/extendr/extendr/issues/612 for how to export DataFrame -/// -/// ## Diagram -/// ```shell -/// Gene Sets -/// ┌───────────┐ First column named 'gene' containing gene name -/// │A0100110100│ 1 = in set -/// Genes │B0100101000│ 0 = not in set -/// │C1011101001│ Due to limitiations with extendr-api v 0.6.0, -/// └───────────┘ function returns a list, and the R package will -/// add the first 'gene' column -/// ``` -/// @param gmt A Data Frame with geneSet and gene columns from the GMT file -/// @param genes A vector of genes -/// @param gene_sets A vector of gene sets -/// -/// @return A Data Frame with the first column of gene and 1 or 0 for other columns of gene sets. -/// @author John Elizarraras -/// @name fill_input_data_frame -/// @keywords internal -/// @export #[extendr] -pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { - let genes_vec = genes.as_string_vector().unwrap(); - let gene_set_vec = gene_sets.as_string_vector().unwrap(); - let mut value_array = Array2::zeros((genes_vec.len(), gene_set_vec.len())); - let mut gene_index: AHashMap<&String, usize> = AHashMap::default(); - let mut set_index: AHashMap<&String, usize> = AHashMap::default(); - let gmt_set: Vec = gmt.index("geneSet").unwrap().as_string_vector().unwrap(); - let gmt_gene: Vec = gmt.index("gene").unwrap().as_string_vector().unwrap(); - for (i, val) in genes_vec.iter().enumerate() { - gene_index.insert(val, i); - } - for (i, val) in gene_set_vec.iter().enumerate() { - set_index.insert(val, i); - } - for i in 0..gmt_set.len() { - value_array[[gene_index[&gmt_gene[i]], set_index[&gmt_set[i]]]] = 1; - } - let mut gene_set_val: Vec> = Vec::new(); - for i in 0..value_array.len_of(ndarray::Axis(1)) { - gene_set_val.push(value_array.column(i).to_vec()) - } - // Construct DataFrame in R. Create list for now. - List::from_names_and_values(gene_set_vec, gene_set_val).unwrap() -} - -#[extendr] -pub fn rust_multiomics_ora( +pub fn rust_multiomics_gsea( + min_overlap: Robj, + max_overlap: Robj, + permutations: Robj, sets: Robj, parts: Robj, - interest: Robj, - reference: Robj, - method: Robj, + analytes: Robj, + ranks: Robj, + method_modifier: Robj, + combo_method: Robj, ) -> List { - let config: ORAConfig = ORAConfig { - fdr_method: webgestalt_lib::stat::AdjustmentMethod::None, + let config = GSEAConfig { + min_overlap: min_overlap.as_real().unwrap() as i32, + max_overlap: max_overlap.as_real().unwrap() as i32, + permutations: permutations.as_real().unwrap() as i32, ..Default::default() }; - let reference_list = reference.as_list().unwrap(); - let method = match method.as_str().unwrap() { - "fisher" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( - webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Fisher, - ), - _ => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( - webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Stouffer, - ), + let method = if combo_method.as_str().unwrap() == "meta" { + match method_modifier.as_str().unwrap() { + "fisher" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Fisher, + ), + _ => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( + webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Stouffer, + ), + } + } else { + let norm = match method_modifier.as_str().unwrap() { + "mean" => webgestalt_lib::methods::multiomics::NormalizationMethod::MeanValue, + "median" => webgestalt_lib::methods::multiomics::NormalizationMethod::MedianValue, + "rank" => webgestalt_lib::methods::multiomics::NormalizationMethod::MedianRank, + _ => webgestalt_lib::methods::multiomics::NormalizationMethod::None, + }; + match combo_method.as_str().unwrap() { + "max" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Max(norm), + "mean" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean(norm), + _ => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean(norm), + } }; let mut gmt: Vec = Vec::new(); let set_vec = sets.as_str_vector().unwrap(); @@ -222,47 +321,55 @@ pub fn rust_multiomics_ora( id: set.to_string(), url: String::default(), parts: parts_vec[i].clone(), - }) + }); } - let mut jobs: Vec = Vec::new(); - for (i, (_, list)) in interest.as_list().unwrap().into_iter().enumerate() { - let interest_set: AHashSet = AHashSet::from_iter(list.as_string_vector().unwrap()); - let reference_set: AHashSet = - AHashSet::from_iter(reference_list[i].as_string_vector().unwrap()); - let job = ORAJob { + let rank_list = ranks.as_list().unwrap(); + let mut jobs: Vec = Vec::new(); + for (i, (_, list)) in analytes.as_list().unwrap().into_iter().enumerate() { + let mut analyte_list: Vec = Vec::new(); + let analyte_vec: Vec<&str> = list.as_str_vector().unwrap(); + let ranks_vec: Vec = rank_list[i].as_real_vector().unwrap(); + for (i, analyte) in analyte_vec.iter().enumerate() { + analyte_list.push(RankListItem { + analyte: analyte.to_string(), + rank: ranks_vec[i], + }) + } + let job = GSEAJob { gmt: gmt.clone(), - interest_list: interest_set.clone(), - reference_list: reference_set.clone(), + rank_list: analyte_list, config: config.clone(), }; - jobs.push(job) + jobs.push(job); } - - let res: Vec> = multiomic_ora(jobs, method); + let res: Vec> = multiomic_gsea(jobs, method); let mut all_res: Vec = Vec::new(); for analysis in res { - let mut p: Vec = Vec::new(); let mut fdr: Vec = Vec::new(); - let mut expect: Vec = Vec::new(); - let mut enrichment_ratio: Vec = Vec::new(); - let mut overlap: Vec = Vec::new(); - let mut gene_set: Vec = Vec::new(); + let mut p: Vec = Vec::new(); + let mut leading_edge: Vec = Vec::new(); + let mut gene_sets: Vec = Vec::new(); + let mut es: Vec = Vec::new(); + let mut nes: Vec = Vec::new(); + let mut running_sum: Vec = Vec::new(); for row in analysis { - gene_set.push(row.set); - p.push(row.p); fdr.push(row.fdr); - expect.push(row.expected); - overlap.push(row.overlap); - enrichment_ratio.push(row.enrichment_ratio); + p.push(row.p); + leading_edge.push(row.leading_edge); + gene_sets.push(row.set); + es.push(row.es); + nes.push(row.nes); + running_sum.push(Robj::from(row.running_sum)); } all_res.push(list!( - p = p, - gene_set = gene_set, fdr = fdr, - expect = expect, - overlap = overlap, - enrichment_ratio = enrichment_ratio - )); + p_val = p, + ES = es, + NES = nes, + leading_edge = leading_edge, + gene_sets = gene_sets.clone(), + running_sum = List::from_names_and_values(gene_sets, running_sum), + )) } List::from_values(all_res) } From ce5e7dc12a741c5b21c26276ba41aeb1e526b69d Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 2 Nov 2023 11:04:39 -0500 Subject: [PATCH 51/82] add start of combination of GMT files --- R/WebGestaltRMultiOmics.R | 85 +++++++++++++++++++++++++++++++++------ R/WebGestaltROra.R | 17 +++++++- R/idMapping.R | 82 ++++++++++++++++++------------------- R/multiOmicsUtils.R | 14 +++++++ README.md | 2 +- build_hash.py | 1 - 6 files changed, 143 insertions(+), 58 deletions(-) create mode 100644 R/multiOmicsUtils.R diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 10c9abe..e196770 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -1,5 +1,17 @@ +#' @title WebGestaltRMultiOmics +#' @description Perform multi-omics analysis using WebGestaltR +#' @param analyteLists A list of analyte lists +#' @param analyteListFiles A list of analyte list files +#' @param analyteTypes A list of analyte types +#' @param enrichMethod Enrichment method, either \code{ORA} or \code{GSEA} +#' @param organism The organism to use +#' @param enrichDatabase The database to use +#' @param enrichDatabaseFile The database file to use +#' @param enrichDatabaseType The database type to use +#' @param enrichDatabaseDescriptionFile The database description file to use +#' @export WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, - enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, + enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, @@ -14,14 +26,14 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteLists <- testNull(analyteLists) analyteListFiles <- testNull(analyteListFiles) analyteTypes <- testNull(analyteTypes) + referenceLists <- testNull(referenceLists) + referenceTypes <- testNull(referenceTypes) enrichMethod <- testNull(enrichMethod) organism <- testNull(organism) enrichDatabase <- testNull(enrichDatabase) enrichDatabaseFile <- testNull(enrichDatabaseFile) enrichDatabaseType <- testNull(enrichDatabaseType) enrichDatabaseDescriptionFile <- testNull(enrichDatabaseDescriptionFile) - referenceLists <- testNull(referenceLists) - referenceTypes <- testNull(referenceTypes) error_msg <- parameterErrorMessage( enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, @@ -56,22 +68,52 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (enrichMethod == "ORA") { cat("Performing multi-omics ORA\nLoading the functional categories...\n") - enrichD <- loadGeneSet( - organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + databases <- c() + if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases + if (length(unique(analyteTypes)) == 1) { + databases <- enrichDatabase + } else { + types_processed <- c() + for (i in seq_along(analyteTypes)) { + if (analyteTypes[i] %in% types_processed) { + next + } + databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) + types_processed <- c(types_processed, analyteTypes[i]) + } + databases <- unique(databases) + } + } else { + databases <- NULL + } + all_sets <- loadGeneSet( + organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName ) + if (length(all_sets) > 1) { + geneSet <- all_sets[[1]]$geneSet + geneSetDes <- all_sets[[1]]$geneSetDes + geneSetNet <- all_sets[[1]]$geneSetNet + for (i in 2:length(all_sets)) { + geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) + geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) + geneSetDag <- rbind(geneSetDag, all_sets[[i]]$geneSetDag) + geneSetNet <- rbind(geneSetNet, all_sets[[i]]$geneSetNet) + databaseStandardId <- "multiomics" + } + } else { + geneSet <- all_sets$geneSet + geneSetDag <- all_sets$geneSetDag + geneSetNet <- all_sets$geneSetNet + databaseStandardId <- all_sets$standardId + } - geneSet <- enrichD$geneSet - geneSetDes <- enrichD$geneSetDes - geneSetDag <- enrichD$geneSetDag - geneSetNet <- enrichD$geneSetNet - databaseStandardId <- enrichD$standardId - rm(enrichD) + rm(all_sets) cat("Loading the ID lists...\n") interest_lists <- list() if (is.null(analyteLists)) { - for (i in analyteLists) { + for (i in seq_along(analyteListFiles)) { interestingGeneMap <- loadInterestGene( organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, @@ -86,8 +128,27 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, interest_lists[[i]] <- interestGeneList } } + } else { + for (i in seq_along(analyteLists)) { + interestingGeneMap <- loadInterestGene( + organism = organism, dataType = "list", inputGeneFile = analyteLists[i], inputGene = NULL, + geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, + hostName = hostName, geneSet = geneSet + ) + if (organism == "others") { + interestGeneList <- unique(interestingGeneMap) + interest_lists[[i]] <- interestGeneList + } else { + interestStandardId <- interestingGeneMap$standardId + interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) + interest_lists[[i]] <- interestGeneList + } + } } + # Load Gene Sets + cat("Loading the reference lists...\n") + ## Meta-analysis } } diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index 2d44641..053bdd8 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -1,6 +1,12 @@ #' @importFrom readr write_tsv #' @importFrom dplyr left_join select arrange %>% desc mutate -WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { +WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, + collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, + referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, + topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, + dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, + useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "ORA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -19,7 +25,14 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD ################ Check parameter ################ errorTest <- parameterErrorMessage(enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache) - + if(!is.null(enrichDatabase)){ + if(enrichDatabase == "all") { + all_sets <- listGeneSet(organism = organism, hostName = hostName, cache = cache) + all_sets <- all_sets[all_sets$idType == "entrezgene",] + enrichDatabase <- all_sets$name + enrichDatabaseType <- all_sets$idType + } + } if (!is.null(errorTest)) { stop(errorTest) } diff --git a/R/idMapping.R b/R/idMapping.R index 0a28e6a..daf96c1 100644 --- a/R/idMapping.R +++ b/R/idMapping.R @@ -17,56 +17,54 @@ #' @export #' @aliases IDMapping #' -idMapping <- function(organism="hsapiens", dataType="list", inputGeneFile=NULL, inputGene=NULL, sourceIdType, targetIdType=NULL, collapseMethod="mean", mappingOutput=FALSE, outputFileName="", cache=NULL, hostName="https://www.webgestalt.org/") { - #############Check general parameters######## - errorTest <- parameterErrorMessage(organism=organism, dataType=dataType, collapseMethod=collapseMethod, hostName=hostName, mappingOutput=mappingOutput, cache=cache) +idMapping <- function(organism = "hsapiens", dataType = "list", inputGeneFile = NULL, inputGene = NULL, sourceIdType, targetIdType = NULL, collapseMethod = "mean", mappingOutput = FALSE, outputFileName = "", cache = NULL, hostName = "https://www.webgestalt.org/") { + ############# Check general parameters######## + errorTest <- parameterErrorMessage(organism = organism, dataType = dataType, collapseMethod = collapseMethod, hostName = hostName, mappingOutput = mappingOutput, cache = cache) - if(!is.null(errorTest)){ - stop(errorTest) - } + if (!is.null(errorTest)) { + stop(errorTest) + } - ############Check source id type######### - errorTest <- idTypeError(idType=sourceIdType, organism=organism, hostName=hostName, cache=cache) - if(!is.null(errorTest)){ - stop(errorTest) - } + ############ Check source id type######### + errorTest <- idTypeError(idType = sourceIdType, organism = organism, hostName = hostName, cache = cache) + if (!is.null(errorTest)) { + stop(errorTest) + } - ##########Identify the standardId for the input ID type########### - standardSource <- identifyStandardId(hostName=hostName, idType=sourceIdType, organism=organism, type="interest", cache=cache) + ########## Identify the standardId for the input ID type########### + standardSource <- identifyStandardId(hostName = hostName, idType = sourceIdType, organism = organism, type = "interest", cache = cache) - ############Check target id type######### - if(!is.null(targetIdType)){ - errorTest <- targetIdTypeError(idType=targetIdType, organism=organism, hostName=hostName, cache=cache) - if(!is.null(errorTest)){ - stop(errorTest) - }else{ - standardTarget <- identifyStandardId(hostName=hostName, idType=targetIdType, organism=organism, type="interest", cache=cache) - errorTest <- stardardDiffError(standardSource=standardSource,standardTarget=standardTarget) - if(!is.null(errorTest)){ - stop(errorTest) - } - } - }else{ - targetIdType <- standardSource - } + ############ Check target id type######### + if (!is.null(targetIdType)) { + errorTest <- targetIdTypeError(idType = targetIdType, organism = organism, hostName = hostName, cache = cache) + if (!is.null(errorTest)) { + stop(errorTest) + } else { + standardTarget <- identifyStandardId(hostName = hostName, idType = targetIdType, organism = organism, type = "interest", cache = cache) + errorTest <- stardardDiffError(standardSource = standardSource, standardTarget = standardTarget) + if (!is.null(errorTest)) { + stop(errorTest) + } + } + } else { + targetIdType <- standardSource + } - ##########gene level ID Mapping########## - if(standardSource=="entrezgene"){ - idMap <- idMappingGene(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) - } else if(standardSource == "rampc"){ - idMap <- idMappingMetabolites(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, standardId = standardSource, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) - } - else { - idMap <- idMappingPhosphosite(organism=organism, dataType=dataType, inputGeneFile=inputGeneFile, inputGene=inputGene, sourceIdType=sourceIdType, targetIdType=targetIdType, collapseMethod=collapseMethod, mappingOutput=mappingOutput, outputFileName=outputFileName, hostName=hostName) - } + ########## gene level ID Mapping########## + if (standardSource == "entrezgene") { + idMap <- idMappingGene(organism = organism, dataType = dataType, inputGeneFile = inputGeneFile, inputGene = inputGene, sourceIdType = sourceIdType, targetIdType = targetIdType, collapseMethod = collapseMethod, mappingOutput = mappingOutput, outputFileName = outputFileName, hostName = hostName) + } else if (standardSource == "rampc") { + idMap <- idMappingMetabolites(organism = organism, dataType = dataType, inputGeneFile = inputGeneFile, inputGene = inputGene, sourceIdType = sourceIdType, targetIdType = targetIdType, standardId = standardSource, collapseMethod = collapseMethod, mappingOutput = mappingOutput, outputFileName = outputFileName, hostName = hostName) + } else { + idMap <- idMappingPhosphosite(organism = organism, dataType = dataType, inputGeneFile = inputGeneFile, inputGene = inputGene, sourceIdType = sourceIdType, targetIdType = targetIdType, collapseMethod = collapseMethod, mappingOutput = mappingOutput, outputFileName = outputFileName, hostName = hostName) + } - - idMap$standardId <- standardSource - return(idMap) + idMap$standardId <- standardSource + return(idMap) } #' @export IDMapping <- function(...) { - warning("Function IDMapping is deprecated and changed to idMapping!\n") - return(idMapping(...)) + warning("Function IDMapping is deprecated and changed to idMapping!\n") + return(idMapping(...)) } diff --git a/R/multiOmicsUtils.R b/R/multiOmicsUtils.R new file mode 100644 index 0000000..73e2236 --- /dev/null +++ b/R/multiOmicsUtils.R @@ -0,0 +1,14 @@ +get_gmt_file <- function(hostName, id_type, db_name, organism, cache) { + std_id <- identifyStandardId(hostName, id_type, organism, "interest", cache) + if (is.null(std_id)) { + stop(idTypeError(id_type, organism, hostName, cache)) + } + geneSetInfo <- listGeneSet(organism = organism, hostName = hostName, cache = cache) + if (std_id == "rampc") { + db_name <- gsub("pathway_TAGMETABOLITE", "", db_name) + db_name <- gsub("pathway_", "", db_name) + return(geneSetInfo[geneSetInfo$idType == "rampc" & geneSetInfo$name == paste0("pathway_TAGMETABOLITE", db_name), 1][[1]]) + } else { + return(geneSetInfo[geneSetInfo$idType == std_id & geneSetInfo$name == db_name, 1][[1]]) + } +} diff --git a/README.md b/README.md index b4a0b67..39e93c5 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ WebGestaltR function can perform popular enrichment analyses: ORA (Over-Represen --- -#### Requirements +### Requirements - R (>= 4.0.0) - Rust (>= 1.63.0) diff --git a/build_hash.py b/build_hash.py index ee8239d..4c17e79 100644 --- a/build_hash.py +++ b/build_hash.py @@ -1,4 +1,3 @@ -import os import datetime base: str = """ From d96e16489c90358fc9cc09b4047d03318e965421 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 2 Nov 2023 13:52:30 -0500 Subject: [PATCH 52/82] update lib variable names --- src/rust/Cargo.lock | 17 +++++------------ src/rust/src/lib.rs | 6 +++--- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index da40a7f..253cf12 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -385,12 +385,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "ryu" version = "1.0.15" @@ -507,13 +501,12 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#963575b5054366a9c8730142b60a9dc53527ee19" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#76c87be4ff7c39c863f6707bf5aeef8f80e56570" dependencies = [ "ahash", "csv", "rand", "rayon", - "rustc-hash", "serde", "statrs", ] @@ -530,18 +523,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.20" +version = "0.7.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd66a62464e3ffd4e37bd09950c2b9dd6c4f8767380fabba0d523f9a775bc85a" +checksum = "e50cbb27c30666a6108abd6bc7577556265b44f243e2be89a8bc4e07a528c107" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.20" +version = "0.7.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "255c4596d41e6916ced49cfafea18727b24d67878fa180ddfd69b9df34fd1726" +checksum = "a25f293fe55f0a48e7010d65552bb63704f6ceb55a1a385da10d41d8f78e4a3d" dependencies = [ "proc-macro2", "quote", diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 360cd84..3ff9df9 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -6,7 +6,7 @@ use ndarray::Array2; use webgestalt_lib::{ methods::gsea::{GSEAConfig, RankListItem}, methods::{ - gsea::FullGSEAResult, + gsea::GSEAResult, multiomics::{multiomic_gsea, multiomic_ora, GSEAJob, ORAJob}, ora::{get_ora, ORAConfig, ORAResult}, }, @@ -240,7 +240,7 @@ fn gsea_rust( rank: ranks_vec[i], }) } - let res = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); + let res: Vec = webgestalt_lib::methods::gsea::gsea(analyte_list, gmt, config, None); let mut fdr: Vec = Vec::new(); let mut p: Vec = Vec::new(); let mut leading_edge: Vec = Vec::new(); @@ -342,7 +342,7 @@ pub fn rust_multiomics_gsea( }; jobs.push(job); } - let res: Vec> = multiomic_gsea(jobs, method); + let res: Vec> = multiomic_gsea(jobs, method); let mut all_res: Vec = Vec::new(); for analysis in res { let mut fdr: Vec = Vec::new(); From 1736acf2f3a88ba9b7357398c303dbb52550d26b Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:24:33 -0500 Subject: [PATCH 53/82] Create windows-compile.yml --- .github/workflow/windows-compile.yml | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/workflow/windows-compile.yml diff --git a/.github/workflow/windows-compile.yml b/.github/workflow/windows-compile.yml new file mode 100644 index 0000000..f0514e0 --- /dev/null +++ b/.github/workflow/windows-compile.yml @@ -0,0 +1,61 @@ +on: + push: + tags: + - 'windows*' + +name: Build Windows + +jobs: + build: + strategy: + matrix: + target: + - x86_64 + - i686 + + name: build-${{ matrix.target }}-pc-windows-msvc + + runs-on: windows-latest + + steps: + - name: Checkout sources + uses: actions/checkout@v2 + + - name: Install stable toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: ${{ matrix.target }}-pc-windows-msvc + profile: minimal + default: true + + - name: Run cargo build + uses: actions-rs/cargo@v1 + with: + command: build + args: --release --target=${{ matrix.target }}-pc-windows-msv --manifest-path=src/rust/Cargo.toml + + - name: List files + run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/ + shell: bash + + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }}-${{ matrix.target }} + release_name: Release ${{ github.ref }}-${{ matrix.target }} + draft: false + prerelease: true + - name: Upload Release Asset + id: upload-release-asset + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/libstring2path.a + asset_name: libWebGestaltR.a + asset_content_type: application/octet-stream From 7e0af6a228a77b3b1d0da5ebe4c1434b9bbf143d Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 2 Nov 2023 16:46:23 -0500 Subject: [PATCH 54/82] add documentation and fix ORA reading GMT was being combined for ORA, but that should only happen for non-meta-analysis GSEA --- R/WebGestaltR.R | 3 +- R/WebGestaltRMultiOmics.R | 163 +++++++++++++++++++++++++++++++++----- 2 files changed, 145 insertions(+), 21 deletions(-) diff --git a/R/WebGestaltR.R b/R/WebGestaltR.R index cc6c887..9f2b924 100644 --- a/R/WebGestaltR.R +++ b/R/WebGestaltR.R @@ -12,8 +12,7 @@ #' categories have DAG (directed acyclic graph) structure or genes in the functional #' categories have network structure, those relationship can also be visualized in the #' report. -#' -#' @param omic_type The type of omics analysis: \code{single} or \code{multi} +#' #' @param enrichMethod Enrichment methods: \code{ORA}, \code{GSEA} or \code{NTA}. #' @param organism Currently, WebGestaltR supports 12 organisms. Users can use the function #' \code{listOrganism} to check available organisms. Users can also input \code{others} to diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index e196770..18e43dc 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -1,23 +1,124 @@ #' @title WebGestaltRMultiOmics #' @description Perform multi-omics analysis using WebGestaltR -#' @param analyteLists A list of analyte lists -#' @param analyteListFiles A list of analyte list files -#' @param analyteTypes A list of analyte types -#' @param enrichMethod Enrichment method, either \code{ORA} or \code{GSEA} -#' @param organism The organism to use -#' @param enrichDatabase The database to use -#' @param enrichDatabaseFile The database file to use -#' @param enrichDatabaseType The database type to use -#' @param enrichDatabaseDescriptionFile The database description file to use +#' @param enrichMethod Enrichment methods: \code{ORA}or \code{GSEA}. +#' @param organism Currently, WebGestaltR supports 12 organisms. Users can use the function +#' \code{listOrganism} to check available organisms. Users can also input \code{others} to +#' perform the enrichment analysis for other organisms not supported by WebGestaltR. For +#' other organisms, users need to provide the functional categories, interesting list and +#' reference list (for ORA method). Because WebGestaltR does not perform the ID mapping for +#' the other organisms, the above data should have the same ID type. +#' @param enrichDatabase The functional categories for the enrichment analysis. Users can use +#' the function \code{listGeneSet} to check the available functional databases for the +#' selected organism. Multiple databases in a vector are supported for ORA and GSEA. +#' @param enrichDatabaseFile Users can provide one or more GMT files as the functional +#' category for enrichment analysis. The extension of the file should be \code{gmt} and the +#' first column of the file is the category ID, the second one is the external link for the +#' category. Genes annotated to the category are from the third column. All columns are +#' separated by tabs. The GMT files will be combined with \code{enrichDatabase}. +#' @param enrichDatabaseType The ID type of the genes in the \code{enrichDatabaseFile}. +#' If users set \code{organism} as \code{others}, users do not need to set this ID type because +#' WebGestaltR will not perform ID mapping for other organisms. The supported ID types of +#' WebGestaltR for the selected organism can be found by the function \code{listIdType}. +#' @param enrichDatabaseDescriptionFile Users can also provide description files for the custom +#' \code{enrichDatabaseFile}. The extension of the description file should be \code{des}. The +#' description file contains two columns: the first column is the category ID that should be +#' exactly the same as the category ID in the custom \code{enrichDatabaseFile} and the second +#' column is the description of the category. All columns are separated by tabs. +#' @param analyteListFiles If \code{enrichMethod} is \code{ORA}, the extension of +#' the \code{analyteListFiles} should be \code{txt} and each file can only contain one column: +#' the interesting analyte list. If \code{enrichMethod} is \code{GSEA}, the extension of the +#' \code{analyteListFiles} should be \code{rnk} and the files should contain two columns +#' separated by tab: the analyte list and the corresponding scores. +#' @param analyteLists Users can also use an R object as the input. If \code{enrichMethod} is +#' \code{ORA}, \code{analyte} should be an R \code{vector} contain multiple R \code{vector} object +#' containing the interesting analyte lists. If \code{enrichMethod} is \code{GSEA}, +#' \code{analyteLists} should be an \code{vector} of R \code{data.frame} objects containing two columns: the +#' gene list and the corresponding scores. +#' @param analyteLists \code{vector} of the ID type of the corresponding interesting analyte list. The supported ID types of +#' WebGestaltR for the selected organism can be found by the function \code{listIdType}. If +#' the \code{organism} is \code{others}, users do not need to set this parameter. The length of \code{analyteLists} should be +#' the same as the length of \code{analyteListFiles} or \code{analyteLists}. +#' @param collapseMethod The method to collapse duplicate IDs with scores. \code{mean}, +#' \code{median}, \code{min} and \code{max} represent the mean, median, minimum and maximum +#' of scores for the duplicate IDs. +#' @param referenceListFiles For the ORA method, the users need to upload the reference gene +#' list. The extension of the \code{referenceListFile} should be \code{txt} and the file can +#' only contain one column: the reference gene list. +#' @param referenceLists For the ORA method, users can also use an R object as the reference +#' gene list. \code{referenceLists} should be an R \code{vector} object containing the +#' reference gene list. +#' @param referenceGeneType The ID type of the reference gene list. The supported ID types +#' of WebGestaltR for the selected organism can be found by the function \code{listIdType}. +#' If the \code{organism} is \code{others}, users do not need to set this parameter. +#' @param minNum WebGestaltR will exclude the categories with the number of annotated genes +#' less than \code{minNum} for enrichment analysis. The default is \code{10}. +#' @param maxNum WebGestaltR will exclude the categories with the number of annotated genes +#' larger than \code{maxNum} for enrichment analysis. The default is \code{500}. +#' @param sigMethod Two methods of significance are available in WebGestaltR: \code{fdr} and +#' \code{top}. \code{fdr} means the enriched categories are identified based on the FDR and +#' \code{top} means all categories are ranked based on FDR and then select top categories +#' as the enriched categories. The default is \code{fdr}. +#' @param fdrMethod For the ORA method, WebGestaltR supports five FDR methods: \code{holm}, +#' \code{hochberg}, \code{hommel}, \code{bonferroni}, \code{BH} and \code{BY}. The default +#' is \code{BH}. +#' @param fdrThr The significant threshold for the \code{fdr} method. The default is \code{0.05}. +#' @param topThr The threshold for the \code{top} method. The default is \code{10}. +#' @param reportNum The number of enriched categories visualized in the final report. The default +#' is \code{20}. A larger \code{reportNum} may be slow to render in the report. +#' @param perNum The number of permutations for the GSEA method. The default is \code{1000}. +#' @param gseaP The exponential scaling factor of the phenotype score. The default is \code{1}. +#' When p=0, ES reduces to standard K-S statistics (See original paper for more details). +#' @param isOutput If \code{isOutput} is TRUE, WebGestaltR will create a folder named by +#' the \code{projectName} and save the results in the folder. Otherwise, WebGestaltR will +#' only return an R \code{data.frame} object containing the enrichment results. If +#' hundreds of gene list need to be analyzed simultaneously, it is better to set +#' \code{isOutput} to \code{FALSE}. The default is \code{TRUE}. +#' @param outputDirectory The output directory for the results. +#' @param projectName The name of the project. If \code{projectName} is \code{NULL}, +#' WebGestaltR will use time stamp as the project name. +#' @param dagColor If \code{dagColor} is \code{binary}, the significant terms in the DAG +#' structure will be colored by steel blue for ORA method or steel blue (positive related) +#' and dark orange (negative related) for GSEA method. If \code{dagColor} is \code{continous}, +#' the significant terms in the DAG structure will be colored by the color gradient based on +#' corresponding FDRs. +#' @param saveRawGseaResult Whether the raw result from GSEA is saved as a RDS file, which can be +#' used for plotting. Defaults to \code{FALSE}. The list includes +#' \describe{ +#' \item{Enrichment_Results}{A data frame of GSEA results with statistics} +#' \item{Running_Sums}{A matrix of running sum of scores for each gene set} +#' \item{Items_in_Set}{A list with ranks of genes for each gene set} +#' } +#' @param gseaPlotFormat The graphic format of GSEA enrichment plots. Either \code{svg}, +#' \code{png}, or \code{c("png", "svg")} (default). +#' @param setCoverNum The number of expected gene sets after set cover to reduce redundancy. +#' It could get fewer sets if the coverage reaches 100\%. The default is \code{10}. +#' @param networkConstructionMethod Netowrk construction method for NTA. Either +#' \code{Network_Retrieval_Prioritization} or \code{Network_Expansion}. Network Retrieval & +#' Prioritization first uses random walk analysis to calculate random walk probabilities +#' for the input seeds, then identifies the relationships among the seeds in the selected +#' network and returns a retrieval sub-network. The seeds with the top random walk +#' probabilities are highlighted in the sub-network. Network Expansion first uses random +#' walk analysis to rank all genes in the selected network based on their network +#' proximity to the input seeds and then return an expanded sub-network in which nodes +#' are the input seeds and their top ranking neighbors and edges represent their +#' relationships. +#' @param neighborNum The number of neighbors to include in NTA Network Expansion method. +#' @param highlightType The type of nodes to highlight in the NTA Network Expansion method, +#' either \code{Seeds} or \code{Neighbors}. +#' @param highlightSeedNum The number of top input seeds to highlight in NTA Network Retrieval +#' & Prioritizaiton method. +#' @param nThreads The number of cores to use for GSEA and set cover, and in batch function. +#' @param cache A directory to save data cache for reuse. Defaults to \code{NULL} and disabled. +#' @param hostName The server URL for accessing data. Mostly for development purposes. #' @export -WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, - enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, +WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", + enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", - referenceLists = NULL, referenceTypes = NULL) { + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { VALID_MERGE_METHODS <- c("mean", "max") VALID_NORM_METHODS <- c("rank", "median", "mean") VALID_ENRICH_METHODS <- c("ORA", "GSEA") @@ -28,6 +129,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes <- testNull(analyteTypes) referenceLists <- testNull(referenceLists) referenceTypes <- testNull(referenceTypes) + referenceListFiles <- testNull(referenceListFiles) enrichMethod <- testNull(enrichMethod) organism <- testNull(organism) enrichDatabase <- testNull(enrichDatabase) @@ -73,15 +175,9 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (length(unique(analyteTypes)) == 1) { databases <- enrichDatabase } else { - types_processed <- c() for (i in seq_along(analyteTypes)) { - if (analyteTypes[i] %in% types_processed) { - next - } databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) - types_processed <- c(types_processed, analyteTypes[i]) } - databases <- unique(databases) } } else { databases <- NULL @@ -131,7 +227,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } else { for (i in seq_along(analyteLists)) { interestingGeneMap <- loadInterestGene( - organism = organism, dataType = "list", inputGeneFile = analyteLists[i], inputGene = NULL, + organism = organism, dataType = "list", inputGeneFile = NULL, inputGene = analyteLists[i], geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, hostName = hostName, geneSet = geneSet ) @@ -149,6 +245,35 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, # Load Gene Sets cat("Loading the reference lists...\n") + referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) + ## Meta-analysis } } + + +load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { + databases <- c() + if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases + if (length(unique(analyteTypes)) == 1) { + databases <- enrichDatabase + } else { + types_processed <- c() + for (i in seq_along(analyteTypes)) { + if (analyteTypes[i] %in% types_processed) { + next + } + databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) + types_processed <- c(types_processed, analyteTypes[i]) + } + databases <- unique(databases) + } + } else { + databases <- NULL + } + all_sets <- loadGeneSet( + organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName + ) + return(all_sets) +} From ce0fe2e9979937f3b5e61ae7237dd358ee7039ba Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 3 Nov 2023 14:32:08 -0500 Subject: [PATCH 55/82] add more ora support and start attempt at windows precompiled --- R/WebGestaltRMultiOmics.R | 78 +++++++++++++++++++++++++++++---------- R/WebGestaltROra.R | 4 +- R/multiOraEnrichment.R | 17 +++++++++ src/Makevars.win | 25 +++++++------ 4 files changed, 91 insertions(+), 33 deletions(-) create mode 100644 R/multiOraEnrichment.R diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 18e43dc..51fac47 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -118,7 +118,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", - referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, referenceSet) { VALID_MERGE_METHODS <- c("mean", "max") VALID_NORM_METHODS <- c("rank", "median", "mean") VALID_ENRICH_METHODS <- c("ORA", "GSEA") @@ -170,22 +170,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (enrichMethod == "ORA") { cat("Performing multi-omics ORA\nLoading the functional categories...\n") - databases <- c() - if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases - if (length(unique(analyteTypes)) == 1) { - databases <- enrichDatabase - } else { - for (i in seq_along(analyteTypes)) { - databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) - } - } - } else { - databases <- NULL - } - all_sets <- loadGeneSet( - organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, - enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName - ) + all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) if (length(all_sets) > 1) { geneSet <- all_sets[[1]]$geneSet geneSetDes <- all_sets[[1]]$geneSetDes @@ -203,7 +188,6 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, geneSetNet <- all_sets$geneSetNet databaseStandardId <- all_sets$standardId } - rm(all_sets) cat("Loading the ID lists...\n") @@ -245,14 +229,48 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, # Load Gene Sets cat("Loading the reference lists...\n") - referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = referenceListFiles, + referenceGene = referenceListFiles, referenceGeneType = referenceTypes, + referenceSet = referenceSet, collapseMethod = collapseMethod, + hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, + cache = cache + ) + + oraRes <- multiOraEnrichment(interestGeneList, referenceGeneList, geneSet, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, + fdrThr = fdrThr, topThr = topThr) + ## Meta-analysis + } else if (enrichMethod == "GSEA") { + if (isMetaAnalysis) { + + } else { + all_sets <- .load_combined_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) + if (length(all_sets) > 1) { + geneSet <- all_sets[[1]]$geneSet + geneSetDes <- all_sets[[1]]$geneSetDes + geneSetNet <- all_sets[[1]]$geneSetNet + for (i in 2:length(all_sets)) { + geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) + geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) + geneSetDag <- rbind(geneSetDag, all_sets[[i]]$geneSetDag) + geneSetNet <- rbind(geneSetNet, all_sets[[i]]$geneSetNet) + databaseStandardId <- "multiomics" + } + } else { + geneSet <- all_sets$geneSet + geneSetDag <- all_sets$geneSetDag + geneSetNet <- all_sets$geneSetNet + databaseStandardId <- all_sets$standardId + } + } } } -load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { +.load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { databases <- c() if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases if (length(unique(analyteTypes)) == 1) { @@ -277,3 +295,23 @@ load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabase ) return(all_sets) } + +.load_meta_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { + databases <- c() + if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases + if (length(unique(analyteTypes)) == 1) { + databases <- enrichDatabase + } else { + for (i in seq_along(analyteTypes)) { + databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) + } + } + } else { + databases <- NULL + } + all_sets <- loadGeneSet( + organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName + ) + return(all_sets) +} diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index 053bdd8..aa5245c 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -4,8 +4,8 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, - topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, - dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, + topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, + dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "ORA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R new file mode 100644 index 0000000..a34b583 --- /dev/null +++ b/R/multiOraEnrichment.R @@ -0,0 +1,17 @@ +multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10) { + for (i in seq_along(referenceGene)) { + referenceGene[[i]] <- intersect(referenceGene[[i]], geneSet[[i]]$gene) + geneSet[[i]] <- filter(geneSet[[i]], .data$gene %in% referenceGene[[i]]) + } + geneSetNum <- lapply(geneSet, function(x) { + tapply(x$gene, x$geneSet, length) + }) + + geneSetNum <- lapply(geneSetNum, function(x) { + x[x >= minNum & x <= maxNum] + }) + + for (i in seq_along(interestGene)) { + interestGene[[i]] <- intersect(referenceGene[[i]], intersect(interestGene[[i]], geneSet[[i]]$gene)) + } +} diff --git a/src/Makevars.win b/src/Makevars.win index 717c761..deffae7 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,4 +1,5 @@ TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu +CRATE = WebGestaltR TARGET_DIR = ./rust/target LIBDIR = $(TARGET_DIR)/$(TARGET)/release @@ -11,27 +12,29 @@ $(SHLIB): $(STATLIB) CARGOTMP = $(CURDIR)/.cargo +# See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock + curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/latest/download/latest/lib$(CRATE).a # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. # # For more details, please refer to # https://github.com/r-windows/rtools-packages/blob/2407b23f1e0925bbb20a4162c963600105236318/mingw-w64-gcc/PKGBUILD#L313-L316 - touch $(TARGET_DIR)/libgcc_mock/libgcc_eh.a + # touch $(TARGET_DIR)/libgcc_mock/libgcc_eh.a # CARGO_LINKER is provided in Makevars.ucrt for R >= 4.2 - if [ "$(NOT_CRAN)" != "true" ]; then \ - export CARGO_HOME=$(CARGOTMP); \ - fi && \ - export CARGO_TARGET_X86_64_PC_WINDOWS_GNU_LINKER="$(CARGO_LINKER)" && \ - export LIBRARY_PATH="$${LIBRARY_PATH};$(CURDIR)/$(TARGET_DIR)/libgcc_mock" && \ - cargo build --target=$(TARGET) --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) - if [ "$(NOT_CRAN)" != "true" ]; then \ - rm -Rf $(CARGOTMP) && \ - rm -Rf $(LIBDIR)/build; \ - fi + # if [ "$(NOT_CRAN)" != "true" ]; then \ + # export CARGO_HOME=$(CARGOTMP); \ + # fi && \ + # export CARGO_TARGET_X86_64_PC_WINDOWS_GNU_LINKER="$(CARGO_LINKER)" && \ + # export LIBRARY_PATH="$${LIBRARY_PATH};$(CURDIR)/$(TARGET_DIR)/libgcc_mock" && \ + # cargo build --target=$(TARGET) --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) + # if [ "$(NOT_CRAN)" != "true" ]; then \ + # rm -Rf $(CARGOTMP) && \ + # rm -Rf $(LIBDIR)/build; \ + # fi C_clean: rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) From 51a23fe0085aa8774a09c481bf2f3082a9b7b91e Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 3 Nov 2023 14:32:44 -0500 Subject: [PATCH 56/82] update to use faster rust --- src/rust/Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 253cf12..d8241fc 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -501,7 +501,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#76c87be4ff7c39c863f6707bf5aeef8f80e56570" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#c439dde641285355f1023797fd439ce3bad98cad" dependencies = [ "ahash", "csv", @@ -523,18 +523,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.23" +version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e50cbb27c30666a6108abd6bc7577556265b44f243e2be89a8bc4e07a528c107" +checksum = "092cd76b01a033a9965b9097da258689d9e17c69ded5dcf41bca001dd20ebc6d" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.23" +version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25f293fe55f0a48e7010d65552bb63704f6ceb55a1a385da10d41d8f78e4a3d" +checksum = "a13a20a7c6a90e2034bcc65495799da92efcec6a8dd4f3fcb6f7a48988637ead" dependencies = [ "proc-macro2", "quote", From 7efc125ba06e7c48d005136a3f90a956f0a06335 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 3 Nov 2023 14:36:22 -0500 Subject: [PATCH 57/82] fix folder --- .github/{workflow => workflows}/windows-compile.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflow => workflows}/windows-compile.yml (100%) diff --git a/.github/workflow/windows-compile.yml b/.github/workflows/windows-compile.yml similarity index 100% rename from .github/workflow/windows-compile.yml rename to .github/workflows/windows-compile.yml From 1d5ac8e84283b08995b8fadba68ec8bfd4147923 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:38:57 -0500 Subject: [PATCH 58/82] Fix typo --- .github/workflows/windows-compile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index f0514e0..a5a3f6b 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -33,7 +33,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: build - args: --release --target=${{ matrix.target }}-pc-windows-msv --manifest-path=src/rust/Cargo.toml + args: --release --target=${{ matrix.target }}-pc-windows-msvc --manifest-path=src/rust/Cargo.toml - name: List files run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/ From 2d22c3b4f0ce4b5893bf92582f16a782ac637750 Mon Sep 17 00:00:00 2001 From: John Elizarraras <13990267+iblacksand@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:46:30 -0500 Subject: [PATCH 59/82] Update windows-compile.yml --- .github/workflows/windows-compile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index a5a3f6b..660d123 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -56,6 +56,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/libstring2path.a - asset_name: libWebGestaltR.a + asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/libWebGestatltR.lib + asset_name: libWebGestaltR.lib asset_content_type: application/octet-stream From 9c5e78572fa57c5305da5ca395654655352e5a61 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 3 Nov 2023 14:51:53 -0500 Subject: [PATCH 60/82] fix windows compilation paths --- .github/workflows/windows-compile.yml | 6 +++--- src/Makevars.win | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index 660d123..dacbd91 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -1,7 +1,7 @@ on: push: tags: - - 'windows*' + - "windows*" name: Build Windows @@ -56,6 +56,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/libWebGestatltR.lib - asset_name: libWebGestaltR.lib + asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/WebGestaltR.lib + asset_name: WebGestaltR.lib asset_content_type: application/octet-stream diff --git a/src/Makevars.win b/src/Makevars.win index deffae7..a0ea523 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -15,7 +15,7 @@ CARGOTMP = $(CURDIR)/.cargo # See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock - curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/latest/download/latest/lib$(CRATE).a + curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/latest/download/latest/$(CRATE).lib # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. From 1bd7e638222b960613483d5f61a4f6198a8f8762 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 6 Nov 2023 13:07:13 -0600 Subject: [PATCH 61/82] add updated link --- R/multiOraEnrichment.R | 10 ++++++++++ src/Makevars.win | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index a34b583..87e3df2 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -13,5 +13,15 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 for (i in seq_along(interestGene)) { interestGene[[i]] <- intersect(referenceGene[[i]], intersect(interestGene[[i]], geneSet[[i]]$gene)) + if (length(interestGene[[i]]) == 0) { + stop(paste0("ERROR: No genes in the interesting list at index ", i, " can annote to any functional category.")) + } + } + + refG <- list() + intG <- list() + for (i in seq_along(geneSetNum)) { + refG[[i]] <- data.frame(geneSet = names(geneSetNum[[i]]), size = as.numeric(geneSetNum[[i]]), stringsAsFactors = FALSE) + intG[[i]] <- filter(geneSet[[i]], .data$gene %in% interestGene) } } diff --git a/src/Makevars.win b/src/Makevars.win index a0ea523..f57a66c 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -15,7 +15,7 @@ CARGOTMP = $(CURDIR)/.cargo # See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock - curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/latest/download/latest/$(CRATE).lib + curl -L -o $(STATLIB) https://github.com/iblacksand/WebGestaltR/releases/download/windowsv0.0.1a-x86_64/WebGestaltR.lib # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. From 1cc0740cd436ae5fbf21894a0372d2d5a6f3f77c Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 6 Nov 2023 14:21:31 -0600 Subject: [PATCH 62/82] use gnu build for rust compilation --- .github/workflows/windows-compile.yml | 12 ++++++------ DESCRIPTION | 2 +- src/Makevars.win | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index dacbd91..82d3c0d 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -13,7 +13,7 @@ jobs: - x86_64 - i686 - name: build-${{ matrix.target }}-pc-windows-msvc + name: build-${{ matrix.target }}-pc-windows-gnu runs-on: windows-latest @@ -25,7 +25,7 @@ jobs: uses: actions-rs/toolchain@v1 with: toolchain: stable - target: ${{ matrix.target }}-pc-windows-msvc + target: ${{ matrix.target }}-pc-windows-gnu profile: minimal default: true @@ -33,10 +33,10 @@ jobs: uses: actions-rs/cargo@v1 with: command: build - args: --release --target=${{ matrix.target }}-pc-windows-msvc --manifest-path=src/rust/Cargo.toml + args: --release --target=${{ matrix.target }}-pc-windows-gnu --manifest-path=src/rust/Cargo.toml - name: List files - run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/ + run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-gnu/release/ shell: bash - name: Create Release @@ -56,6 +56,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/WebGestaltR.lib - asset_name: WebGestaltR.lib + asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-gnu/release/libWebGestaltR.a + asset_name: libWebGestaltR.a asset_content_type: application/octet-stream diff --git a/DESCRIPTION b/DESCRIPTION index db38548..817545f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,7 @@ Description: The web version WebGestalt supports 12 License: LGPL URL: https://github.com/bzhanglab/WebGestaltR LazyLoad: yes -Depends: R (>= 3.3) +Depends: R (>= 4.0) Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), doParallel (>= 1.0.10), foreach (>= 1.4.0), jsonlite, httr, rlang, svglite, diff --git a/src/Makevars.win b/src/Makevars.win index f57a66c..d416daa 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,10 +1,10 @@ TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu CRATE = WebGestaltR - +BASE_TAG = windowsv0.0.1c TARGET_DIR = ./rust/target LIBDIR = $(TARGET_DIR)/$(TARGET)/release -STATLIB = $(LIBDIR)/libWebGestaltR.a -PKG_LIBS = -L$(LIBDIR) -lWebGestaltR -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll +STATLIB = $(LIBDIR)/lib$(CRATE).a +PKG_LIBS = -L$(LIBDIR) -l$(CRATE) -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll all: C_clean @@ -15,7 +15,7 @@ CARGOTMP = $(CURDIR)/.cargo # See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock - curl -L -o $(STATLIB) https://github.com/iblacksand/WebGestaltR/releases/download/windowsv0.0.1a-x86_64/WebGestaltR.lib + curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/download/$(BASE_TAG)-$(TARGET)/lib$(CRATE).a # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. From fb8505bcba91856b1deb584e5293159b19dd49bc Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 6 Nov 2023 15:07:52 -0600 Subject: [PATCH 63/82] add r install --- .github/workflows/windows-compile.yml | 7 +++++++ src/Makevars.win | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index 82d3c0d..4c4ce58 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -29,6 +29,13 @@ jobs: profile: minimal default: true + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + rtools-version: "42" + - name: Run cargo build uses: actions-rs/cargo@v1 with: diff --git a/src/Makevars.win b/src/Makevars.win index d416daa..eff6a57 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,6 +1,6 @@ TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu CRATE = WebGestaltR -BASE_TAG = windowsv0.0.1c +BASE_TAG = windowsv0.0.1d TARGET_DIR = ./rust/target LIBDIR = $(TARGET_DIR)/$(TARGET)/release STATLIB = $(LIBDIR)/lib$(CRATE).a From 268bf9e403a32a77e1e3ab27dce2bf40e8e280c8 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 6 Nov 2023 15:34:40 -0600 Subject: [PATCH 64/82] revert back to msvc --- .github/workflows/windows-compile.yml | 26 +++++++++++++------------- src/Makevars.win | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/windows-compile.yml b/.github/workflows/windows-compile.yml index 4c4ce58..431e7ab 100644 --- a/.github/workflows/windows-compile.yml +++ b/.github/workflows/windows-compile.yml @@ -13,7 +13,7 @@ jobs: - x86_64 - i686 - name: build-${{ matrix.target }}-pc-windows-gnu + name: build-${{ matrix.target }}-pc-windows-msvc runs-on: windows-latest @@ -21,14 +21,6 @@ jobs: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: ${{ matrix.target }}-pc-windows-gnu - profile: minimal - default: true - - name: Set up R uses: r-lib/actions/setup-r@v2 with: @@ -36,14 +28,22 @@ jobs: use-public-rspm: true rtools-version: "42" + - name: Install stable toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: ${{ matrix.target }}-pc-windows-msvc + profile: minimal + default: true + - name: Run cargo build uses: actions-rs/cargo@v1 with: command: build - args: --release --target=${{ matrix.target }}-pc-windows-gnu --manifest-path=src/rust/Cargo.toml + args: --release --target=${{ matrix.target }}-pc-windows-msvc --manifest-path=src/rust/Cargo.toml - name: List files - run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-gnu/release/ + run: ls ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/ shell: bash - name: Create Release @@ -63,6 +63,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-gnu/release/libWebGestaltR.a - asset_name: libWebGestaltR.a + asset_path: ./src/rust/target/${{ matrix.target }}-pc-windows-msvc/release/WebGestaltR.lib + asset_name: WebGestaltR.lib asset_content_type: application/octet-stream diff --git a/src/Makevars.win b/src/Makevars.win index eff6a57..0e327bb 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,9 +1,9 @@ TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu CRATE = WebGestaltR -BASE_TAG = windowsv0.0.1d +BASE_TAG = windowsv0.0.1e TARGET_DIR = ./rust/target LIBDIR = $(TARGET_DIR)/$(TARGET)/release -STATLIB = $(LIBDIR)/lib$(CRATE).a +STATLIB = $(LIBDIR)/$(CRATE).lib PKG_LIBS = -L$(LIBDIR) -l$(CRATE) -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll all: C_clean @@ -15,7 +15,7 @@ CARGOTMP = $(CURDIR)/.cargo # See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock - curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/download/$(BASE_TAG)-$(TARGET)/lib$(CRATE).a + curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/download/$(BASE_TAG)-$(TARGET)/$(CRATE).lib # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. From 8be12bd51ea4a97d15f24a4f0999dc89f8f67ff2 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 7 Nov 2023 13:55:21 -0600 Subject: [PATCH 65/82] add more multiomics ORA --- R/multiOraEnrichment.R | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index 87e3df2..5d398e3 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -1,4 +1,6 @@ -multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10) { +multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, + maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", + fdrThr = 0.05, topThr = 10) { for (i in seq_along(referenceGene)) { referenceGene[[i]] <- intersect(referenceGene[[i]], geneSet[[i]]$gene) geneSet[[i]] <- filter(geneSet[[i]], .data$gene %in% referenceGene[[i]]) @@ -24,4 +26,25 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 refG[[i]] <- data.frame(geneSet = names(geneSetNum[[i]]), size = as.numeric(geneSetNum[[i]]), stringsAsFactors = FALSE) intG[[i]] <- filter(geneSet[[i]], .data$gene %in% interestGene) } + intGId <- lapply(intG, function(x) { + tapply(x$gene, x$geneSet, paste, collapse = ";") + }) + intGId <- lapply(intGId, function(x) { + data.frame(geneSet = names(x), overlapId = as.character(x), stringsAsFactors = FALSE) + }) + geneSetFilter <- lapply(geneSet, function(x) { + x %>% + filter(!is.na(.data$geneSet)) %>% + filter(.data$geneSet %in% names(geneSetNum[[i]])) %>% + select(.data$geneSet, link = .data$description) %>% + distinct() + }) + geneSet <- lapply(geneSet, function(x) { + x[x$geneSet %in% geneSetFilter[[i]]$geneSet, ] + }) + genes <- lapply(geneSet, function(x) { + tapply(x$gene, x$geneSet, rbind) + }) + rust_result <- rust_multiomics_ora(geneSet, genes, interestGene, referenceGene) + disp(head(rust_result)) } From afb3cd693b827edeaf6b07a63e33be08647f4642 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 7 Nov 2023 14:06:58 -0600 Subject: [PATCH 66/82] add pkgdown --- .Rbuildignore | 3 + .github/.gitignore | 1 + .github/workflows/pkgdown.yaml | 48 ++++++++ .gitignore | 1 + DESCRIPTION | 3 +- NAMESPACE | 1 + _pkgdown.yml | 4 + man/WebGestaltR.Rd | 2 - man/WebGestaltRMultiOmics.Rd | 193 +++++++++++++++++++++++++++++++++ man/listArchiveURL.Rd | 15 --- 10 files changed, 253 insertions(+), 18 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 _pkgdown.yml create mode 100644 man/WebGestaltRMultiOmics.Rd delete mode 100644 man/listArchiveURL.Rd diff --git a/.Rbuildignore b/.Rbuildignore index f1fa43a..27b2bef 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,6 @@ ^\.Rproj\.user$ ^\.github$ ^\.lintr$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ed7650c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.gitignore b/.gitignore index 953d576..b1df3b0 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ vignettes/*.pdf .DS_Store _site index.md +docs diff --git a/DESCRIPTION b/DESCRIPTION index 817545f..a1d64cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,8 @@ Authors@R: c( ) Description: The web version WebGestalt supports 12 organisms, 354 gene identifiers and 321,251 function categories. Users can upload the data and functional categories with their own gene identifiers. In addition to the Over-Representation Analysis, WebGestalt also supports Gene Set Enrichment Analysis and Network Topology Analysis. The user-friendly output report allows interactive and efficient exploration of enrichment results. The WebGestaltR package not only supports all above functions but also can be integrated into other pipeline or simultaneously analyze multiple gene lists. License: LGPL -URL: https://github.com/bzhanglab/WebGestaltR +URL: https://github.com/bzhanglab/WebGestaltR, + https://iblacksand.github.io/WebGestaltR/ LazyLoad: yes Depends: R (>= 4.0) Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2), diff --git a/NAMESPACE b/NAMESPACE index b2159f0..c54016f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(GOSlimSummary) export(IDMapping) export(WebGestaltR) export(WebGestaltRBatch) +export(WebGestaltRMultiOmics) export(WebGestaltR_batch) export(affinityPropagation) export(fill_input_data_frame) diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..fcaefc5 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://iblacksand.github.io/WebGestaltR/ +template: + bootstrap: 5 + diff --git a/man/WebGestaltR.Rd b/man/WebGestaltR.Rd index 0d84def..9e991d4 100644 --- a/man/WebGestaltR.Rd +++ b/man/WebGestaltR.Rd @@ -64,8 +64,6 @@ WebGestaltRBatch( ) } \arguments{ -\item{omic_type}{The type of omics analysis: \code{single} or \code{multi}} - \item{enrichMethod}{Enrichment methods: \code{ORA}, \code{GSEA} or \code{NTA}.} \item{organism}{Currently, WebGestaltR supports 12 organisms. Users can use the function diff --git a/man/WebGestaltRMultiOmics.Rd b/man/WebGestaltRMultiOmics.Rd new file mode 100644 index 0000000..a2f8652 --- /dev/null +++ b/man/WebGestaltRMultiOmics.Rd @@ -0,0 +1,193 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/WebGestaltRMultiOmics.R +\name{WebGestaltRMultiOmics} +\alias{WebGestaltRMultiOmics} +\title{WebGestaltRMultiOmics} +\usage{ +WebGestaltRMultiOmics( + analyteLists = NULL, + analyteListFiles = NULL, + analyteTypes = NULL, + enrichMethod = "ORA", + organism = "hsapiens", + enrichDatabase = NULL, + enrichDatabaseFile = NULL, + enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, + collapseMethod = "mean", + minNum = 10, + maxNum = 500, + fdrMethod = "BH", + sigMethod = "fdr", + fdrThr = 0.05, + topThr = 10, + reportNum = 20, + setCoverNum = 10, + perNum = 1000, + p = 1, + isOutput = TRUE, + outputDirectory = getwd(), + projectName = NULL, + dagColor = "binary", + saveRawGseaResult = FALSE, + plotFormat = "png", + nThreads = 1, + cache = NULL, + hostName = "https://www.webgestalt.org/", + useWeightedSetCover = TRUE, + useAffinityPropagation = FALSE, + usekMedoid = FALSE, + kMedoid_k = 10, + isMetaAnalysis = TRUE, + mergeMethod = "mean", + normalizationMethod = "rank", + referenceLists = NULL, + referenceListFiles = NULL, + referenceTypes = NULL, + referenceSet +) +} +\arguments{ +\item{analyteLists}{\code{vector} of the ID type of the corresponding interesting analyte list. The supported ID types of +WebGestaltR for the selected organism can be found by the function \code{listIdType}. If +the \code{organism} is \code{others}, users do not need to set this parameter. The length of \code{analyteLists} should be +the same as the length of \code{analyteListFiles} or \code{analyteLists}.} + +\item{analyteListFiles}{If \code{enrichMethod} is \code{ORA}, the extension of +the \code{analyteListFiles} should be \code{txt} and each file can only contain one column: +the interesting analyte list. If \code{enrichMethod} is \code{GSEA}, the extension of the +\code{analyteListFiles} should be \code{rnk} and the files should contain two columns +separated by tab: the analyte list and the corresponding scores.} + +\item{enrichMethod}{Enrichment methods: \code{ORA}or \code{GSEA}.} + +\item{organism}{Currently, WebGestaltR supports 12 organisms. Users can use the function +\code{listOrganism} to check available organisms. Users can also input \code{others} to +perform the enrichment analysis for other organisms not supported by WebGestaltR. For +other organisms, users need to provide the functional categories, interesting list and +reference list (for ORA method). Because WebGestaltR does not perform the ID mapping for +the other organisms, the above data should have the same ID type.} + +\item{enrichDatabase}{The functional categories for the enrichment analysis. Users can use +the function \code{listGeneSet} to check the available functional databases for the +selected organism. Multiple databases in a vector are supported for ORA and GSEA.} + +\item{enrichDatabaseFile}{Users can provide one or more GMT files as the functional +category for enrichment analysis. The extension of the file should be \code{gmt} and the +first column of the file is the category ID, the second one is the external link for the +category. Genes annotated to the category are from the third column. All columns are +separated by tabs. The GMT files will be combined with \code{enrichDatabase}.} + +\item{enrichDatabaseType}{The ID type of the genes in the \code{enrichDatabaseFile}. +If users set \code{organism} as \code{others}, users do not need to set this ID type because +WebGestaltR will not perform ID mapping for other organisms. The supported ID types of +WebGestaltR for the selected organism can be found by the function \code{listIdType}.} + +\item{enrichDatabaseDescriptionFile}{Users can also provide description files for the custom +\code{enrichDatabaseFile}. The extension of the description file should be \code{des}. The +description file contains two columns: the first column is the category ID that should be +exactly the same as the category ID in the custom \code{enrichDatabaseFile} and the second +column is the description of the category. All columns are separated by tabs.} + +\item{collapseMethod}{The method to collapse duplicate IDs with scores. \code{mean}, +\code{median}, \code{min} and \code{max} represent the mean, median, minimum and maximum +of scores for the duplicate IDs.} + +\item{minNum}{WebGestaltR will exclude the categories with the number of annotated genes +less than \code{minNum} for enrichment analysis. The default is \code{10}.} + +\item{maxNum}{WebGestaltR will exclude the categories with the number of annotated genes +larger than \code{maxNum} for enrichment analysis. The default is \code{500}.} + +\item{fdrMethod}{For the ORA method, WebGestaltR supports five FDR methods: \code{holm}, +\code{hochberg}, \code{hommel}, \code{bonferroni}, \code{BH} and \code{BY}. The default +is \code{BH}.} + +\item{sigMethod}{Two methods of significance are available in WebGestaltR: \code{fdr} and +\code{top}. \code{fdr} means the enriched categories are identified based on the FDR and +\code{top} means all categories are ranked based on FDR and then select top categories +as the enriched categories. The default is \code{fdr}.} + +\item{fdrThr}{The significant threshold for the \code{fdr} method. The default is \code{0.05}.} + +\item{topThr}{The threshold for the \code{top} method. The default is \code{10}.} + +\item{reportNum}{The number of enriched categories visualized in the final report. The default +is \code{20}. A larger \code{reportNum} may be slow to render in the report.} + +\item{setCoverNum}{The number of expected gene sets after set cover to reduce redundancy. +It could get fewer sets if the coverage reaches 100\%. The default is \code{10}.} + +\item{perNum}{The number of permutations for the GSEA method. The default is \code{1000}.} + +\item{isOutput}{If \code{isOutput} is TRUE, WebGestaltR will create a folder named by +the \code{projectName} and save the results in the folder. Otherwise, WebGestaltR will +only return an R \code{data.frame} object containing the enrichment results. If +hundreds of gene list need to be analyzed simultaneously, it is better to set +\code{isOutput} to \code{FALSE}. The default is \code{TRUE}.} + +\item{outputDirectory}{The output directory for the results.} + +\item{projectName}{The name of the project. If \code{projectName} is \code{NULL}, +WebGestaltR will use time stamp as the project name.} + +\item{dagColor}{If \code{dagColor} is \code{binary}, the significant terms in the DAG +structure will be colored by steel blue for ORA method or steel blue (positive related) +and dark orange (negative related) for GSEA method. If \code{dagColor} is \code{continous}, +the significant terms in the DAG structure will be colored by the color gradient based on +corresponding FDRs.} + +\item{saveRawGseaResult}{Whether the raw result from GSEA is saved as a RDS file, which can be + used for plotting. Defaults to \code{FALSE}. The list includes + \describe{ + \item{Enrichment_Results}{A data frame of GSEA results with statistics} + \item{Running_Sums}{A matrix of running sum of scores for each gene set} + \item{Items_in_Set}{A list with ranks of genes for each gene set} +}} + +\item{nThreads}{The number of cores to use for GSEA and set cover, and in batch function.} + +\item{cache}{A directory to save data cache for reuse. Defaults to \code{NULL} and disabled.} + +\item{hostName}{The server URL for accessing data. Mostly for development purposes.} + +\item{referenceLists}{For the ORA method, users can also use an R object as the reference +gene list. \code{referenceLists} should be an R \code{vector} object containing the +reference gene list.} + +\item{referenceListFiles}{For the ORA method, the users need to upload the reference gene +list. The extension of the \code{referenceListFile} should be \code{txt} and the file can +only contain one column: the reference gene list.} + +\item{referenceGeneType}{The ID type of the reference gene list. The supported ID types +of WebGestaltR for the selected organism can be found by the function \code{listIdType}. +If the \code{organism} is \code{others}, users do not need to set this parameter.} + +\item{gseaP}{The exponential scaling factor of the phenotype score. The default is \code{1}. +When p=0, ES reduces to standard K-S statistics (See original paper for more details).} + +\item{gseaPlotFormat}{The graphic format of GSEA enrichment plots. Either \code{svg}, +\code{png}, or \code{c("png", "svg")} (default).} + +\item{networkConstructionMethod}{Netowrk construction method for NTA. Either +\code{Network_Retrieval_Prioritization} or \code{Network_Expansion}. Network Retrieval & +Prioritization first uses random walk analysis to calculate random walk probabilities +for the input seeds, then identifies the relationships among the seeds in the selected +network and returns a retrieval sub-network. The seeds with the top random walk +probabilities are highlighted in the sub-network. Network Expansion first uses random +walk analysis to rank all genes in the selected network based on their network +proximity to the input seeds and then return an expanded sub-network in which nodes +are the input seeds and their top ranking neighbors and edges represent their +relationships.} + +\item{neighborNum}{The number of neighbors to include in NTA Network Expansion method.} + +\item{highlightType}{The type of nodes to highlight in the NTA Network Expansion method, +either \code{Seeds} or \code{Neighbors}.} + +\item{highlightSeedNum}{The number of top input seeds to highlight in NTA Network Retrieval +& Prioritizaiton method.} +} +\description{ +Perform multi-omics analysis using WebGestaltR +} diff --git a/man/listArchiveURL.Rd b/man/listArchiveURL.Rd deleted file mode 100644 index ea75376..0000000 --- a/man/listArchiveURL.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/listArchiveURL.R -\name{listArchiveUrl} -\alias{listArchiveUrl} -\alias{listArchiveURL} -\title{List WebGestalt Servers} -\usage{ -listArchiveUrl() -} -\value{ -A data frame of available servers. -} -\description{ -List available WebGestalt servers. -} From cdda9683c35a376a641e5e4a47a682c9b8b384e5 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 7 Nov 2023 15:25:48 -0600 Subject: [PATCH 67/82] add installation page --- .gitignore | 1 + DESCRIPTION | 4 ++++ _pkgdown.yml | 1 - man/listArchiveUrl.Rd | 15 +++++++++++++++ vignettes/.gitignore | 2 ++ vignettes/Installation.Rmd | 39 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 man/listArchiveUrl.Rd create mode 100644 vignettes/.gitignore create mode 100644 vignettes/Installation.Rmd diff --git a/.gitignore b/.gitignore index b1df3b0..793da79 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ vignettes/*.pdf _site index.md docs +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index a1d64cd..560a8ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,3 +27,7 @@ RoxygenNote: 7.2.3 SystemRequirements: Cargo (Rust's package manager), rustc v1.63.0+ Config/rextendr/version: 0.3.1 Encoding: UTF-8 +Suggests: + knitr, + rmarkdown +VignetteBuilder: knitr diff --git a/_pkgdown.yml b/_pkgdown.yml index fcaefc5..69b3674 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,3 @@ url: https://iblacksand.github.io/WebGestaltR/ template: bootstrap: 5 - diff --git a/man/listArchiveUrl.Rd b/man/listArchiveUrl.Rd new file mode 100644 index 0000000..ea75376 --- /dev/null +++ b/man/listArchiveUrl.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/listArchiveURL.R +\name{listArchiveUrl} +\alias{listArchiveUrl} +\alias{listArchiveURL} +\title{List WebGestalt Servers} +\usage{ +listArchiveUrl() +} +\value{ +A data frame of available servers. +} +\description{ +List available WebGestalt servers. +} diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/Installation.Rmd b/vignettes/Installation.Rmd new file mode 100644 index 0000000..07c6dcf --- /dev/null +++ b/vignettes/Installation.Rmd @@ -0,0 +1,39 @@ +--- +title: "Installation" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{installation} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r include = FALSE} +library(WebGestaltR) +``` + +With the new version of WebGestaltR, we now require Rust to be installed on your system, as the core computations are written in a new Rust Library. This allows for up to a 15x speedup in GSEA. We can also process more datasets for ORA with less memory usage. + +## Requirements + +- Rust 1.66.0 or later +- R 4.0.0 or later (Windows requires 4.2+) + +> Apple silicon users (M1, M2, etc) need to have the arm64 version of R installed. You can check what platform you have installed by running `R --version` in your terminal. If you see `x86_64` in the output, please download the arm64 version from [the R-Project website](https://cran.r-project.org/index.html). + +## Steps + +1. Install Rust according to the instructions here: [https://www.rust-lang.org/tools/install](https://www.rust-lang.org/tools/install) +2. Install the R package from CRAN for the latest version of WebGestaltR + +```{r eval=FALSE} +install.packages("WebGestaltR") +``` + +If you have problems installing the package, please [create an issue](https://github.com/iblacksand/WebGestaltR/issues/new?assignees=iblacksand&labels=Installation&projects=&template=installation-issue.md&title=). From db72160e8d0044d37a715c54e887843754d38de2 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 8 Nov 2023 15:19:36 -0600 Subject: [PATCH 68/82] add better checks and logos --- .Rbuildignore | 2 + .github/workflows/check-standard.yaml | 49 +++++++++++++++++++ logo.svg | 24 +++++++++ pkgdown/favicon/apple-touch-icon-120x120.png | Bin 0 -> 7127 bytes pkgdown/favicon/apple-touch-icon-152x152.png | Bin 0 -> 9095 bytes pkgdown/favicon/apple-touch-icon-180x180.png | Bin 0 -> 10849 bytes pkgdown/favicon/apple-touch-icon-60x60.png | Bin 0 -> 3386 bytes pkgdown/favicon/apple-touch-icon-76x76.png | Bin 0 -> 4390 bytes pkgdown/favicon/apple-touch-icon.png | Bin 0 -> 10849 bytes pkgdown/favicon/favicon-16x16.png | Bin 0 -> 1260 bytes pkgdown/favicon/favicon-32x32.png | Bin 0 -> 1798 bytes pkgdown/favicon/favicon.ico | Bin 0 -> 15086 bytes src/rust/src/lib.rs | 2 - 13 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/check-standard.yaml create mode 100644 logo.svg create mode 100644 pkgdown/favicon/apple-touch-icon-120x120.png create mode 100644 pkgdown/favicon/apple-touch-icon-152x152.png create mode 100644 pkgdown/favicon/apple-touch-icon-180x180.png create mode 100644 pkgdown/favicon/apple-touch-icon-60x60.png create mode 100644 pkgdown/favicon/apple-touch-icon-76x76.png create mode 100644 pkgdown/favicon/apple-touch-icon.png create mode 100644 pkgdown/favicon/favicon-16x16.png create mode 100644 pkgdown/favicon/favicon-32x32.png create mode 100644 pkgdown/favicon/favicon.ico diff --git a/.Rbuildignore b/.Rbuildignore index 27b2bef..7c7c2bd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,5 @@ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^logo\.svg$ +$build_hash\.py$ diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml new file mode 100644 index 0000000..a3ac618 --- /dev/null +++ b/.github/workflows/check-standard.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/logo.svg b/logo.svg new file mode 100644 index 0000000..718ec30 --- /dev/null +++ b/logo.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png new file mode 100644 index 0000000000000000000000000000000000000000..a5f9104b99fdc2e228c4c00ef705d8e7ddfde35d GIT binary patch literal 7127 zcmZ`;WmFW-*IrmUL`eY&rAxX)=}zeeDaoamE@=f^Sdi}SZdgF1LmGi)>4v34;^p`8 z{qR3??sL!FnLG2GdA{5^H(EnY9v_DS2LJ%zD=NrnJ$vwfhK2Er!d3!P&koI2QdJTF zsExW^b0~Nh0Py4j0QSuR0Fj>n02wHwO;hZ70mD*R zUIy^=-^l4KPJE7FdMK*OV(#E#k-Wf;l6IU004TtUGLqWhzekw{2^sUAt3f+~%AC{U zY#2a7+8z=BOH?|uft%7cAsef*d8t-zMv|EnwB%P6yP7QjaBTV6PgWM6=<;?0P%|2iz$Ep_-DpXp>ghX z{di(<+`5y63&i7s0sbc#1uyRs+*l9bngybwRFqXG^gZ0PbTxm(n?WZ;54iW6xVm@% zhWdTm$rcL@X2wHl4+((5x4Y$o-@U9+0->=3Y(uVtuh9*(lS~uDX~D5ILCAv+WQq>} z_zma{xCS%>+k>UfT{w2z0!D2tE%772sgXS);FTWzBr;ioOz(OR%sMO{8 zd_c<(nXf96@d5Rm4*@HnixGJwnwBzl3ndsJgr$Pe<`Uv*?2}Ko3F^e~Cu?DbeKt{Y zpGTuf^TI1q+d4#dMrRDMcr(NiI%^__HWMss{-OnK6qN*6i#pIXTmN`MnH6+~B}7;h z9GUjvA1JJiQc5rs1&;E_#LrX)1S76N7f)MEa*_|Y-=;ny|;yD^sIr?onh<{U27(e7;#o)d=Dx2r+$ zKfu4qh_Ld_+y8x;bTN|TJsy10Y_Ad2ft@#~W(au(mPy*Gy0AVoQ`ddWu!OSG-V&HkEUq~bg z91|`fG$u@Y0Iig`+hWqtA&!_?To?!uAN#P1WjTK(-MWZLMYNoZFCl~vV@4VYmF_nm zwa#7R)O@R-t_Bu^UQn(;I)9JDf%h2BZS^FpdGlPb>yQt7GwH`NvL~N)bXJ%f3fiL1 z;U~XeJ+Ok#5PYSW_Z6yn>AynL;Q=phbd1r;XiQIaX<<=LBIudspPrevC*v%SUXjmo zscU5&>p$5A7FrMj${s{SIazD(>Y4cgaS zNCQML*%^fOCAeq6gTJHYQw~fl*n`J|^Q-FBQ(?>3xMMUUKAdrK)KQ z2ZhNQ*ZYRr&bKW(Eve4V0_i58LnF4|*My=nl`Ojo$&?b>tCy2i4d7;4YE^5#6aENO z*388x%iD`FWm7u7K`&c<(B`)WNgKMY}xwg!)~kVW5JC)RkqoJ3I@*H-DJDKC~1+PV)EU6*lo z7i%4ta%hdA`*I&*P4jQG_AVZ?lgF2}d23yAE}M^Jym2~cz!=c+_AU-=QrK(z)GfaE zfFNVwA>`$DWNNpZTH-z7AMvGT_YP*MV`pIoniQ6>LeQ>y){a%TGX)5EQHRk#EL zTfC-kO@_cLbd6m_sp|IPV>9NJJ^Q{>D_pyI^GlJ4Ix`7Wa7XNNMH809RV5TXlBfmU zrJCUTAO3E185!Ve0eX%BmeFk|2 z6e?^z5k-cO&bT_n$sEs67c>*-pDP4gAL$_j^7}Jt3-gNm81f4(^26)w0u``u@&cKE z0=}W(v2EsJbY$2s>WwW`b$!_~_yq~yaUbZ5nOBg5^!=#*tOs5UE$cB$6{1f~PNC=uIs5H7kJ@A>_E$+! zf278ze?V*plx21HxOXHnfR-fG5^hbMI5cq^ew%@K>Ty;NYCvoB7xX|3rC??uAvOD?H zu8JNknK(MP<&?N7nb0iasGIoUi`PG$N2Mr1ieK2w&^xSB{#{g0RXXv4uljq5WBKf< zqnCxZK)q1b*ldhpEWWOw#vaB^>|jw)l3?KcmwGBpLGHdMQe{L_^Lbd6(1$Ar#Mja zh;_3T=Ile2b~;N-uLq~{D(43d?|ZeqECN|OYlJMxD5+DOl$sEz%A7k*AK!Z{NXo%j z$;aQnG51uf0LVq+gsjw$OcY)DinJ}+RpAxg#vNmK9)>}U$A2sl$h%OXF(fJ>P2cnD z@RF|5+!`Ja&y1vb*@7P-ECigXXGoTy)h_=9yKmO#xPrDjHqL4j@9~xj7wG($W;U(J zy!=-DM1S?T1P50Srv&? zTyv+Q{Xs4pEe>!UicRFss6Q<3od`{vug2QGN?5+sK+AS>qoq(hjtn=Wr)y_OIVAzelD>I0T=)8>UNx$#h(q-3LbD(S@usXAwL5yGidf^aA;X~FQOiAd zGTynPR^ZwY^yy+S{jyR9Qs2(A7b0C9Ws-b$s~w7wqO8IuBZ@7Cw=C-pTn)j@p4zAu zcOT>IM3v79%(YfhZV_AKT)S$kT8gK$s@vt<%34WGQ*Jd?a&>3ra|`I$s?7bl0{PZY zz$eDxEDK?PEZ;-EWzCJHaEUMXPGkdvP}xhECjxd(%T2-Labf`tP$oY%0o1Vd7NJ{*J)QzCJ7dx?xA? z_9LRCV*XUhmM7Yhh498cYuHG69C%5r)I8r*L3Q?*P0xQM9kQ*4kd!2!B#M7J+t5|A zPyGv#{hCb!-eqx8tiz03W|Y9cchBfN+O?L8)x~mL&t&z=FGE)`^<2<%6W=|eM0=aK z#W=K)S{yE*Kx&da6|V25)20^`GlKmAZ`95pC=glmX&(Ngn###wJ`8rUt{c;qHh<^! zZm3|t(55J#-UnrNJDhi{sFV?v>R#4L4R7c0M8o*_FBW{DOAy&3kU z-#gN%aNhEeJ+vaB{)~-{V@djM%KYT}tRzj9WPPhn9km|iJx4aHdG@9R4B6CGn^S(I zf^EIQDb0rPK+Q5!X*=-F!!2^Q0~!%*ews&JzI|EKvN<^M+StE!2L-)nK6g`(wD&OC zx@yqDO>I^!{lK|U&>o&@E&=9sVfd4VXL1eeBb+^ ziH?Ua6?e715pj9uslcI^W3~G)o!z3TTO!HdW^OtIJE%0X?0c>`9UX~s8UGXXG$P1x zSHvTX|6_Ppw<1ds=0W1A#m3rE6#7?F?~m+SW3(8O3_K$}@!kyZQh%D1R&;&77N=JM zG)l6S-{YCQziw@Qyl7SpCd#EA`TH;%3XL(tfAoEqDBP|7@D>({af59_+U7`|3K$WXa=P;I+D;>L_BvVl*$L>C`*C=o8f zPneVMT5!a|*jAT_Aoz2f_MTdk_#HO=)1h$#1pGLQ@Fr45FC0P(;U8)-b6BaPk&TFc zR|fbn`xIaxDR_vSbE%=>3DS-*eT%#%xwJ>-#eh;h5aHv{#>+FVro zJ7LJ9`|{dA4;o(8+Ts%(2(WBSN522SQ#o0u@#0hmu;~I7gtiDZHlAoadg5& zc@cvU=OHRmZOhNrizACm4(S}tb%v(5{27~F-O0xK&D!4gvroT6BD+zmtfmfHI(Cgf z<*G+`OwVuqe*a8cFyhzCgywc$w%T7d76SwFPG&qs862Kx;A06Vg4*_I>@P16(&a`-5wGHl)xGg$0@_WkQ zRnh)+hc(M?*;$~kcjD%kG*x~$0J^init4%#v(w+61 zv~^22RH}r^G4R$GTe|N{JCsdCVYryR&f1vjK2i~OE-emx*rPe`TYcmULedn)&!|3{ zWnDU_8S?b?MWc;{hnpT>k!cz5oUc*dws~JgRIpq}FO!y*72rFtrVzTHTCHbFWDiZ% zEqP90Bm6$j><#f4y}Jo#>INvhxbP1CM*$J!g-JvH6BIUuY6+99>d6!^@r$Cd z;Tzz=hjbo@a?3H@0}6!0=y~d9=-fD&bdqQw_Xh^_+=KZ}v| zL936kP4w7v;gu(7!ho0d87q*iwAGZXEEffdpO=gg<9>d>Z8}UolYDwF4;eAgtBgCw z@{T3WKLz5iCLdWM>krobtzQZ@ZF4kZp*xmA*lo2kM@E;!e;H>CjV?nV6%3vMVN!^| z5CYyk$6!xY(78T^W{Q_lj_^?PYm~t+Nyp0SoQKawkH(ZT3UZ!HXPyAJ%aM-mBJ0Rm ze-m!A|86K7ZIEw!_j-75rMB9-{bk1w!xYDV?Gkf%LbHsy{A`znDx{o+ zHqHXP?zpMi`{qac;`5Hd=*~LLIu~S_{z3DdQ)1foI5#5Y?N8OG@AQiFUJ+tH z!|hz*-=L&p3a$1^{Slkzr8klhJC)VsMpKBj>43}+t_+rjHn^x)M0<9E-LtvxQ z`@f8-KNw7#w2T^x>Jo-}rEEx9b(1NTl1*;EHCY6w_^)B6HU)Wo)+6-47-F&`-Rr>g zj@m2|Z*LOFuP&vUzWv*hS&ovzBNokY;h=@bR+Lfw(=FF>INX|rl%gmho*wVhJuT6M zgQC9!;Koa4VBIX*|EWdyej^vEZ>6l#zSE=|?olWaA)q*;GNc7%*IJKIYH%zxZPco? z(yBE4RB2%ULkUvF#?qLY8F33r$)sM6PI!m2VlFNVHZs zZ$cj$%g8e#YRl}j_!n;@hDek6v=rYl#A)Y66#l81vMb?x$M#*HK`DvPfzZc*|2 zx%=naa!w6u$8wq8{_>8BsAK!%fT)2uc6+rQ$#s|Eik#%>U_~)`sM{yjAsK7$qt1?I za0Tc~nGFO~pG79~PLWJ~n{>79CA*pVXF?Y~;ueFI@q{J_P8-Dh)hnIOcAuvvVXktY zsnDDKy|ugJ#UYQ9N4eA9nf@FR$Hf*jzu8|qABH(QIuZP&%{s7u_NpGyuiNbs5mj1N z+MZa=Y8_5QrOSJ4gC5ZqSn0cy+67JDv1WI<~iC81FoaXl_$PkB-&D+ba9JIuzngE72we;!fI4rMy0}H?Gg+-D3TUCUC@hhhQ|FA@5 z!?mukX?Bz#{Ek?UKT@#liV7^$=F9 zcyI9U2OB%Zi`*}yVmFHWj{lrm8j%kzN8%jqtLm>2KVj(#ew$LwvG^dxwbw|4D$btD zI`th_fvW)@{Oxy&J{1GMs^mxDE7yKA`BC1k?z<)}kUQ_S0+E4z#DR4;mb22ebhsRo z`%;|F#UG~lhOT>BjMD~ne`t!4Ue$lK@@AMAB&t@%g3SRnRC3RL}enLdI>)8-I5hlq(%+I zB|A2H>RHK=En8Px^{25lTg9K%CFC{|i9YvTd;CJSNlL~wxIsu~$Og(6s?>v}4kjlR zx1_>QK`26OT;)A(Y6J_IjU4$N^dlyL?-_a}S{7G?&2jsAM`3v(n`AxG&wV4v)j$C(sA R^!W;aqO6)sm6Unt{{gc6-)sN? literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png new file mode 100644 index 0000000000000000000000000000000000000000..46246db6fb70388ab5834a6711424dcdfd464826 GIT binary patch literal 9095 zcma)CWmFVSv|qXe>24&XYgwcvrMs7ImRLHZMY^R+bfvqOUQ#-wdj&~pl#qJ-KfVv| zoOkBT+=+ANH*u%#+*mCQC45|JTmS%oul!D4=b2{z=dm%LL&`ozre}ig@K*gT08pQd zcW;I9oMyCnr=t!41hN4DVUYm9zvry5eE`6R9{@PC1OUXIE1-bpw`)s07hqbeD#-($ z{&PiLrJtU2u)LMk6|na3u*nE;iBu%r0RUzWW%;*y{wv1?#wq!WKAWL?A*$}Pjh2{l z0CYcM04VkwM|_%s=_@+Z)F|H7h+hMIR-@i+Tdy+obc*QH7SnCoE=Ws{(s%TJeH)E^ zS8Dw!t-3*Ox{2HrEU&ZlniqicfI|1op*>2%F& zO7a@KKLb-rrk4!-^Z$v!2^wZ7reVPZp||mXvbdMO!cnzc?&$%TT8yYF2wMJREed44 zgN4y0E#+h!WMUC>;CbIKTkCGt??kvF)NaXF)#>Uet~3(?blf^ zA8HJ<0#XBMgD?}VBoo#}nqJ$LYV8&rrc4!W5&A+dUoRhVWbeTUE`pn25`QE7X@;kiZ}0~cuy}sIb|z<26vLG0G*Ax)))c-BEILCr691L(g)~@V&3JKQiU61ua>eGz4WYYz64} zZO}jqYW5v4d0WkqPtw1t-1i=fK%ml6)2)y9Pw@v$9d-qMtV890%E(4hi^|tkiR2V; zkx8^<-24O{k5VibRZFZw6n*}h@JA+I%tmwkCioo(;0|h?r|Zuk>VYI=pnalle&2)p zy}L!j&C^v7I<5 zEv_0+9K6*oK;)iRO(-^M`yv$`uF<4XHuOY0o(N+pMbE|8B@a}wzSq?#VCn^sg3hRR zJvrYI2DZMpcZImSnol3|xT8z>txZyKMu_xkI36QnGjr3Qmx zCnVn@5bFeGO{LFd%}pVH-b_0D3n6t3cdM{ehdy=fa7-u_%A+rTszhsHEk zM~a%?&9i4~dU2qm1fkxby6NGY?)94D$YEJku^rPg#^)KXjcT%s64`om|2jTYEU2aP zkBZ+Pg-!Njea}`V+pb!mfEsZ1UlAG@Iw@NwW-BW4P3^n;yPZ7DgU*dxhiPi-<_x*`bGizxFbsEQ z#5bIXVYY%ct7F`%z6I>2mc)n?wF?%|BG$>+vZenZ}zEv~ozRbzRN# zBM)2~68Q1cW^R&0(7-{;-Fc_c6a(WgVPoq?=-3I|zqOVv*>v=S!UWdO`y{6xlO;o% z!X*p3LV|hC)-S;Lu*T`zA1w>$C6*X?U|Sj!K8%SniVXvX6%g&-3E_Z6;Ad155sw@D z$G$|02+z&$oFFh|;^n;S9A(6q%$v=`56$EyaN2x$SbVB~bx?>9tY}d9sH0lS+dVY5 z?fgR8A~A2u;0*=$iwQEw?ZbR3-W{Z0C_K=h_0fx6^fz$vxdPG>BU0~t0QnNz7hDWF z<%|A12+QXqO`-ROcWtT*RnU-Azj%i$jk}BlMpHAZxG@Q3F<*q^_Yn$0Bdr}&UOIa& zU>WbN9R6^k_GaRHSoj_4-+zDNuHRh-FS@o*T(*RSN*n@5LN@71ePpI79KC4n36BWZg!;$22;28l@9+i zsb`Isk(Wo?Ou>=AG=|+)&44!?dHHfjsQU69Q&6N?{85~P_!SAIVWe`!?r#aN_r2W= z^wM*MDgrkXgdU5H_8dO%nz*TvLL4=59LH7iX^{rMp5b%Wo@n9dS*1&7El3(4;iuE-L1 z?7JRs3foJKUEf(7GjUlHpR218>b=VP zjB%EKBk@CcT6EI6dck3A2SwbFxvnJvIQ=~x=HU0@y~&!DFo=S8N?3+F!&AsMzg z_ra>@iJ_pO*~g%iq@)|)Jzm&@c%pqAKAuABeNnMLp~%3z2QJisLs4kuGi@2gc87KE zVl&htkNM)=Y}En;&DZxa%GTQ1Ffhb-cv257ahBCu2+=Ae64K%pQG;6r!*m}&DXD6- zPUe1J8{@xXf&Ie_)=js*?etXUXyhom1wPqbu+K+wStF=7gC+^DRlBvWEs40r91LZ~u-cCd=BTR$oyZA2Uq-|uj*9>2Er#d6Ugur6NR{zdUH|Doo$oK7uz3+r&%+pu9${Z*XhG@Ltl(==tP z72`pw4@>4>-y*~BNo6rf#HXG|w5xOmf$x42mrz#!V~cqEYpMzI^|D%_@115XQSe-j z!rf!bpwR!;UhUY{;*Og*60*k za{7nUbz>a!&FR~bC^+V~_w{W)G6K}jp`LzSl&A6N$$uSX6^=Sz zo4pw3Fp9@A$<$`Z1Hv`2O)Y1YcXCY0?+wQ&DZf>Tu648G3+{f8eTNSV=mU_Gh9xY}e z=#8WUq|ft9x83ZA7c&vO|gHLTJxUu zL&fq?n8U4t(@zryoH|hSogOvl{9Q1yO~igc3}I<^B&OBFMw0ePnh^xvzyvFS^L#2X zIubp{V?054(8DQeXhg=#Gj6`1hjT~>GFS6{X*QbZgYYQP10Gm?W-tR*yUbrm@M=L% z&6@X@k(>mmRkTifRvHqYqOvS+KZJLj@>h5h_)(t~r2pxjyIT5T;_ug1GzPhB*4HStOHPQc-F|-c-D%Lt905&1rsK(?;uu#FZ_)w(SCzGx`?pL_0-nY z0>04Ib(4#c<$SwoPCH;QRAaCFK}Ol9@1{n$v3!lxJ@91iP_eY>389Q6+%bF>AFl)+ zi-Z(?*s}1&Bxy=hB`yj7n|YKe{ifwzYy)RDLEVrdwDQ{Ef*7^2AAUc@k{&9OKSG|8 zQK%w6-O&eensYXAp)X*+jYJqib)}70PVUJpqOBpL=GMI4Jl85R4HXZN&r{7gP_Jws z_a85tr2jTAP5LpXMqb*z*6?mqI)}aqe-jD2qYvbK^Yw9Hi+sp+IWdX?Y!w4XLouGyQJ%h|TP z7DU7UJZ5$>wZ&~nF@V7O2URsj4Tbt+TCn?MNG(&1g!jzhpLtrtn(0R#o+X5U> zS0HW?a|ZLM?>%K{h4fOo{yX=#={c8h#O^CVNSFQwvagj9D+8~Fyqem^rL;V$=D)fO zkoK0y))6bO=Uo^ad{UOxGUp$>q!YWUWCFMuX6PaY9yRZEU$nf;ul-vIU#D9@GDm_h z5J7BR)pJR{G;hp;B|U&d#K!!yKOt@HZE))2-~4BiLc&u?mC$vm}I%FJ=tgx)Pi}moj`bGGxb2&ABS+U-XRS z1{3F4Z#Dc;|Mq(p$g2=8u6-heVE4F;|DD;N5ii}HqD(Me`=?NAgvW$()kYz*y={LY zQhKyRhm5dV#jRnZAk0~uqbBN@9p5c8s35O@QTBdVEqg=HTw}324M+b@q|Bp*Lw26# z+TjDJ5%{HPR4umIzUNCbFwsm_z~mj?IZRiz;Bjd5cXxG0YI^|3t;6`o@tMH5<~-yr zEl^aq1?BZ*oV(x9$D77aig#&>RxBf;r6Dx-N>!4$CKP5Ix8J?ykBwPF>VQ&d=QX0#@xH(GlQ#;L&y8Tg&Jta)o@)Cmk# zc5_@Vwh$LP;xF{E>FX8|I-nioX%x$;x9^8ed=wQ-%FX?u^LHN#&Rsu-`&%^N+(+?5 zfBeSVP-OTgHXl{g zd;*1+^CCs&{h3Tehd!yql`f2|B0IX)tQ#tc&V?89=H{_3<72bAhCk&`Y}68MFGkdEoX#mR z?+htrw%VP72J4G2TacNP82TE!qsgR3t5!T*rwQ;_c@Mp;%aB(FGNTuG@%=@O+v(@8 zHRA}@O)Nu!e?8mzjlfT>MQMvahUd+;pU>^%R`IXVl~m#vsT9Uu1KGat0_*2Fwz$+J zZ@&l9tP$`(;b%M2uK+sFl#)TCX2Z`4rG(XEZ*|XBArmh9CaI-9CG4ZH=N>ftBWks! zj!m~d2%<1y70E!(fBE+QT;PL{n#Ji8Z1X+dPo3j9RpYOv=1-V+^#9lyJL6YXuIbx5 z%jQ!s70i|z#eVy+7TN=CqGy-J1Ip7E#Y-B0$}{i0p){Q$8aeD;~=RoQdpokYPbays|87*TttBC(sFb7>p%txflM)4tvkin3G zjyB!j&3}GW&)Ks13z=SNVy#ZxYJMzZ6LeVTEC4E;xmx_Wkr!#}=~M*!xzbNi|l0_+;D-^MZYA`HoikgWEIuFNx$gn~8j``9NAr&ho>eQV^zZ*cFFP@i4q@yb zb_FjM$#d|QL;)Cqw=LAAh>{`9mb zH&Ghe5uBdrXv0hHr4FiM-+0 zPbj=$%I;CTF_)~>%^5w6f8;H`Or$K7N+3#|JLEWX%AabW-*W{g9Si4lMZ%^aKE3L(RjH*Hx#nXf24uT-*lf} z>lrJ2YSx1E5GrRoog_CFx{~Jw2+?$=RtIk9wI3mdV#kgLz7l8XLRQES}-!)h~{@6PQ;;5Q$x~u2WeU{wbY})SZf!Owv#^Zz_HQMQ|Np#9b zz|dE^SDSe2m|b~exy6Kwi&J~fMx6Sr<)*s1YU6`1ZM=LX1R7cEGRMBeaGr5PSA?KP zek;;Wh9A=Tf^U^|Uc}yDy|}szHc@d`o0?cl%g>?IC@n?X$|cj(3EGT1&$lxbe}S^> zEld=YI9^qn=0d#}S|CgGW9_w(va^8-1R;rXDe!4vWGTU-Nyn8p$b608YMWuB zB~Bx_2Zpy#AN?{3h00%4e4!5y zWFtE^I6*!%-7ID+xcr|t(r^CC`iS@o>0YvljPpSzd3=?hAD0<(+|2=y-DhMUfz)y; z$S#*ku6f8)MIrCtp!n(E46lN3Y`)cBUNX?aW@4qo@-$2gNaI^C@P%8%XXA@!yuaEC zYy(IBJ!6A*@F_~W=pMXEdTKe_f@b@8Hr(5NR7)d$mBeTzbqes3++r-q@#YtF3;62a zsa06oWWeo_lwq=)BR+a+hjSw^M7hR9WIx3GHKZBqW zpH%E5?*$X7UxOA$;*30ba2+d_M2@_T9F{V8gUuqc+|dJzD+~ zXi2Rz>b`>!T|s9Q%K3$EXl`F8mY-N@GDw`$?VHT$TXgL#e^|dto32#h<;dwiig>+C zt|nlZCfm)~=3*vj#r-xVZV$w>v9%2(l875I(=I~u74hfVR?(`#Uoq;~&&-Mye_+=A zRh4QoSQ3+%0WTpom2!v}9jE(q++MN2dCF0Y(3S4F3zXaRVk%OleIR03zX$9SFh>1KAEn+A3=$*x zVmH91Le1rr@XCj^!nk}zM|Mfw|t>$jyZ$FfOu>kE9*B{vsF7^vx`J@aT2AS znID6|Ve{m>9ipb+%;L3}7HZky*b?b;8Ie^{bEm#)+_t*i?=&ok64h5GX4TN& zntQX}(l%A%iLc){({JYI8UWVmZ$pbl%EzzUf|PVYXfPK`?TV=V%$TX4sTHM8xikNY zdF|^7Pvh*YL(?Un6d&(vVvE*lvp{qf=c<{N9bBAjg|t)SkN%^z>SLo3#$zYdrr6L* z?AmEX&~p9h4D_lv%iuzd7&Afxqc_BO$Wnl95FPQmojLg)S?=DS&ui*M? zqx8r`=bOFxJsN##ik^VVEws`wmCCn^ZaYm5Vd9{6GBE}15o77#vd+6c2XeQ-&{rco zRTr?Yj)4LkpT$!1f?&2^FG_L*_Z1uA`v?w%Wi;c*$5%wAR4>VsLbG$EKZ(%ZCw3?EZli(9JgeplmO5DL*5REG zJbZO*;6DoPh5Ahp~1ZdUrXIW@sxF`@UrzFOq09X=p^CGo1-S(xf4GbKNA7Rxm?Xxh}><-~l zE$CaG%rVt{aGjIn4b1}#7sCBtaR*F2El(?xKD-X6sRl`$QvYES0TLp@9A#%x`#IbGuu;L&;xw- z9!%JS5!kg^8tsu*bYgy1dKINIrKzIc4ZGjw?wHFq%n4Z=mKCm2g`+8PO<~w|z7u*H zvhkq5CHym+a^rhOcOW)JK50v|Pc0soexrGz8@7wMusKPuUGDQOIqZTyg15zFam(Mv zYT!{$FX5tsOatW_jp6(8hwhA3m9PL;4|JS$BBSxBD)|%63CaWXGegu(!?MW2oyN_3 zUsWuz``t!=Ar>tdMNhDJYQi|#moV{lTX&Bc;N6L3c(3?jseyAP`}^}?R7`nB=T2f zB2WJ=M!INCO|Jfu)_P^$AIamBhuBzk6iS|uJQPO25ytQ%+^WnZ^o4!HL-I+qJ5W2T zNrwwtSPx3K!^|6ogNnLL&#i`Od$gZe^%pi^qiu7rdh&}b5<(D(z!{5vMM>hXnk4j+ zin18gQ{+Xj$mMkB>#6``G{P#yL+hb!ue;fe!egr;*+9o1O*DyQNfHjkf2aZzL%LRv zA`v*>e16YM$S4UD6ZoM$VeXl_w6NE`uEKTm_NO$=YwBiVp2r^;=9RJye>y_qeWTvB^qp zQ7C!#;!_6hr(0TS9rikL4+Gb1uECZ}*EM0WQ`AlEYyrCbYA!RA{B7yZf)$i^i!M3%?~CtnNca7ZIjpl2uo@&&z&z`5QUE!Iqowdx5iH%ic@m zQs~i95#2U=3F-v~GONR9joeA(hWm`Xrf|PQhMQU9>gbXNs=WR)?6<5odzBS<@U#n_ z6A_Z{1HmNp9+`sF`{U#fN~WBmxnt*}{4g*itoBYzax$u?J%flwhe}o%U6#jt%qNGU zcd|h>K&i>|>L+jH8oL^0AWo8LeLkN-xJMh5C-yIeC(UfNy?=kKS= z5A^}Bkt$kC+^m75P5M@Ft72>C>TDw_bTWKAVxSdMg-t+gf?siP?DCJrjT+zn~D0fB+A_sGgvJn1HaD06!N$zZgIN zxI*IT|6*`;w{@})_`eytD1o}q42FSvM&3GBevD90cY7xnJ4WvSs2!sd)Y}FC@Xz1A zc!BLz5oCK literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png new file mode 100644 index 0000000000000000000000000000000000000000..d7172e37c25f2148fb7dbeb6b04835635039cb68 GIT binary patch literal 10849 zcmbVyRahKNu=e5}AXoxyAV_d`cL?t8Zo%E%Ew~fh-F0yY?kqtUcX$8!{=0K=ZcfiL zQ!_nNU2k>ORMkj?f}8{zGCnc@06>%cDXR2A8~1nz=aV2I5GeLxYGatJo~J6Mc$7S@J7-SqJa1R zmfWtA#E%{XXGvKxgnbl5oKL6^RB2iO02Q>PsE~^L@@cl^Zym79hTlG@sH4QojFF|s zAGTF`pih-VY-D*{ES4@L_jA6jVVh=lz zC^nA;Vh<+(W+EvgKC5GpAy$*z*_chwt*o4lgssGU^$)VyyOn{t_EnAfHtxJ}{{-^; z<961zpMwJTe`^&`nul1FUu1F~Zu$WlIOvEtQi1^Y@xvc5M!7Kf+BiHrn*29KH8o2f zc!Bf=;5!>RBq_v7s70vbjyNO(>@R&0?GzS+NZin70f{|A5LrlWsSQbAP@JEQrhxSK z1J%EYD*{5&KD>(hhN!LITH#tTDYguWVaGdL4m~mwJ*IFiU?lwa{fWMArcpmZws}%_ z8*l>k$a(8tl}$wbVSC7Gk(QuWgBwN-?~6ts2fe5eop4_`FWh?E1kG{c;`rXcKCf9J zxiyZic79)aD4C&t!JR?d4n-{0Iz#D)=PdXlxf8k*K=Sytj!iLdvb$lnO*t3))Qga# zAusj#3cZ>LHP2V5KkY%9C>GE$XHMF|qAaYXy;1L0T$693+u8#6l7W;YXo1KV^pI~5 zb_2TVKhIQKA&i{wPx3fxc8t6a59-g3xgeycyD6b^V&U9Tva>~3KHJ}lgE46naFco9 zL%skv`=B^Wi0C~NIU`M|D9+uhZ#Vx$a&PtNiXdh%Qx-W0-8_bdwmW>Ph~h{9dYwMv zYLj7Kz+hZ=y5;qU$u~sUBY1qb?O@)4EIXGrAk986joc0$1ltzF5rdPS(fWJE)5S^m zL{~R_g6>L70M>XDCuTQ#YiM%a5MCd^%RWEnH0(IOBDduY_d$?W2gdu0I|u#zT#a0| z06;Q;+aCLY$zCwHYzTGaKx^sQ?#ccJ$Lzz>h+9;iz>Du$)=a%w=iK3x7kEV3ou|j| z=MU05z}LUB)0u97GlEtecZ@IQk33Bj_EFbuanN=EyGX_&DJ%a}f$nkhWR`2%E3_U7 zd*N@!wUw8T7&rSf*8rcOo&#E3*ZUyhUnfC-`&)U2e;6zy z+X0@NPayWl4v3wsJzK8TAwqS>*sqiqLOKJ&Uvh=jIa-qt-pww{wK6ok8>0kRm^DH`dat(;v|W)Fez77!ZQ}*-Y{gg+4M?dZt0t zwUzx$pc*ljRcm@F&BQ|M^e{9Bk~2rGfcp!*N3PrufM)L1TV&go8x<$2p< znx)aTqMR7`2Uge4td8fL6juwiTjxXfjB&4vz1|1YiNKq-U)5>4R(#}_R#ZK{v{oB3 zt&J3l)p(|IDBIFo1zfKpxD%oia#Y4&Qm-xS`-k#3|ELcu^zXj7S95S3q2g}fel zpX|ftbnogbp_4A%BkcQKf83u67Yb+a-+HSs0v4#u>|L3_Qy@+vEx~M4`YPwRN({bt zS2X+Xz7*oUFzU`uQ-pZvn(>CEjkh+5zm*OhkOd$+b{{vMZQI`%jM{667 zyUN=5y8DxIcW6m*@X@fn(K`3}Eu3xd5CC!6BcY4zIgpFi*5B<<$RDqVW&tmQMbSS@ ze@h%-4*CFD6V8>E)LiQac&8#1Op7QMAJ9>Wk?m!bx_oT7Zw}nT&>vb_z`zv@bPg{X zOl1GF@(TrT=NaEV9?iyFK0KeCqE>Np1qJ;WKM`Kfxu?SRf6B`I_^fG{DZtDw=n$pKyPP`{hI)u=zR z1zCD`jpJ2@U0kQjb!JgIrkMJ(g_eC3z1+^)g;LBbYxQbRIfx&bJ6>&wqXoRqk)mds z(B_F}?l5Wx2I=$I&nY*F5SVpwVwg0oufbC69d;7vS$fm{ z1Mw-;48iFzLdgzz)})mDkbkN=0-lZ-bBJ0Wg{mI5bge~f%M~S^s|+R3HnuWDQ)qXNOj@Kg*x(F(RncN+l9y# z4<}W`do0p*in$;c=Qwo(7_$LluUfa#>1rKoXNJ~Lbo#&@i%^vpiMADAnN$fqM%-El zeuu*MsCm+B49Q82c0yENu)xtlQ4x}8uy>a>-^#gc^llJ}$yHO(J=sp$sPw71yOU)= z1#03+TcM7>^EkS2kwJIT*_RUqY9^qs@#Q5RE$*!KBp)LmaS2RS%$tZ_M31Ai zF5S)?WCy@+12Gk4?!TMo2p#adh&1QW}f`bEN4ok-DhM^gp-$;`p(hY~0G57s~#Lmy}P)EwC5VyUr?XkN4+jrXHQq zjukugsy+xT*TB%W(Mz<@#8MFsCEMywMx=PC|8p0o37D}OoZHN(>n9bjAQh)!`%d`z zRyq*R-(8o6+wO};LQRCP$ zC1hac_XnEOUrsUE>A;4$2}Q4V-MZ6(VH|{!5nW83qNP;8*|`YS+PYAROiS}xS|0-> zl6CL)_PE(NdPZub!8+6jNOXO25Il2)KcjTIwWaGkoK~HNU@-fW3IE96)A~Ql#Dr$3 z<4xg}=N?&Sq_7>$H_qrJ%7Vem%C~jD+BF+uIEhC*&ozVLIC`+4-+{HS%TA^vxXUiE zU(!Z&G)vHF^UBs$KNsxw%+(og$jP|7{>q+NZne-jMb_Bs>a@1G?g;sMAywWFQktxq zA^)x+x#N7{NX67(ChKj!b8Jt4t2x= z$pQDwRX8Md4@qOncajLkSJP!_F5n9UR8XO{z9OV$mBDtomR+Ac?&m%FkEG>G4Y|ld z3Wo~cnXKH|{$i!k$|;-c-i?W?x0R1;CunH05k|HiC42<}A5?SS5$1UPvcJ&g1=|Ab z3xN;27Uk7KtG~7O>97r&*=!P*W@B7q(0UK6qPd{*kRmH0285{bCKzfxdvps~V%`=8%e9zzhGou)epyx(gCqHFpM=lnKV#4CvI zPuq`UQY2PUSFbug*4Ku;swCp0w{i{+P+qMBaiqW9%jIlH*N%MWX{u`w%2;Pq zo0iRXE;qqh&cOpiY_;=n&e+118EtfC@K?ze@&1uw4Gk`@*`~>1+y`a1qd<7MC4C8Z z%<$BsNLnq+75)wsHtK}*7|!nkJ*(pE$ih}~@VB#*)+}5=)X35rTc&#j*tpG|MGLYn z8%4B z$xFc))}3j{X|s>W>K3EEfw z)r6z@&&!Oht8uzLsfw{%R6Oqoc~PLtd|b~Jb2p*iB#SGsFVxi-o5BtL`=>Jc;KKCz zu6aTRhL&V@NV2L#XRn5LtuW#T;w2fcJYYl4r(5Zn1Ps;;vxioC8=Q`Z%i1xj%m zwF{dqU}@lKM}Ei1BWY0l6fY&B+VT(fk{3Y<7JcisNbZy?8R>EgCw7glD#5e9$EBKI z?fZ4^?+qwXPe2LZn+EyMmZJ9)-Tv3<3>{|!edZ~W1vgbqK)a2ff2tajRIPiDR+zUx zD|!Q)w=cW09bIMV8zONNv)GO8$YHbjvz^@>usbl`zN85m?sFwFoqs8uz`8c~_xmYl z^XH?c>6L}JXF9t4!HVvULN z+*zttqOOSjJ6lm+{W*o+N~}HC2_@Uw7dKJ#2LwtYs8bI- z=dpLh*1{LRzWQG-m0y69d%UHegQu2N~ zjv~*<>3NrAQhY@J=rYLde&WH^Ev7YxGw@tP`UE{huhjRpG z-}fW-es~W({;cuC3U0|lPI0FK8XS+uK2AFl1|$|U>}03e&QV+YqIdr zU&0HNq*t8>DIu#`S9XT9B4Zk{?8=77t1G4RNzX?X(v^LiqR-mzT&1-2eBlmq#Ip^# zLq0PzMLG$!pnU8}QfRkePlM{-F`5t)EwDq3=h~G0m~$4(Y@}niLgt0|QQ9EWpV%5v zjxA7UX2g!c$&T$sijrqJ4!2gg#p+IHM|uv-l!!Lxr2;^H{pQb-!b=uXw`4)JTHDzV zJ-VT9>Zo0)s3M15EJY`44f|eV2k-%6qQ+k>J?s$Xdi81yM~CU&i0#6B5_?T&(Ef7B zVWKtR`nglr5@_$|q+~AA!w1m#%fZ!9u=JwzI+{q)-w_5d!35!6IVRFKJrtDi@?hSI zu4N<9eq1L=;%rox?Rv4Qv9iI!+mo8hnj1tt*plc#_xP8pph-nx4hW0O5-=tMFG)KK zP$Xxsf|N!4_42V06tnTNZh^lzNn-3M)0O6TFaITtBkz)2jiAGY{maB;v7IEV_1{1W z7`2-xKBHjMe?;6rB@dk-e19ZkN8HcGyMsbZ!MJTkqcFbhRDAEd2{Vwr@4RxNFh`o+ zC6q0+dfUu0UpJG_Wh($2pcL6YU z8EdyWN?KZGX*^5%QHRHySgS~H;Giq)>o%DPV@I&I{=A{N(j%~al#i=C$XX?|fpEc` z^xBNDE9}|*%-qfsC+&Ji;V&2gu8{=8z4i$&W2P@iD_Q=qd4H_?yP>aZC}Qm263{g? zp0?fEZUpl=N?c?sFS2>jQH`OEB-lEkgU8mZA+S@iHvVsA$j$8f_fpZ~uP+89Y{P1f zzbyjJBNr@szJ`6Iow1iX&{I7M)5Vi54boHbfF@?p-jpc3WDMZEmU z2n~yDKMZ%K-~mEHD>wmsKCDe`{?7&!)DoD3Cog?LOJr~l1gp(WG^Ryt^X{ejRY>$t zZ0g=s|1KZawfK!k6NP`;=yU#Cx~ z76Il~7g|`SgAtzpz!9pAO+V%>(#DQ7T5A5fzkpo7AB_zQ<44Tf*!l+k9+6o>1ql>? zzGdOJ@wbG?E3Av|v3mdDFnVv$H@W8PhV6@UAi8i(q1s=fpj&Wu4HCk0;guiLs>jka zlJiPuMWC7%sHHRklHQI#=5mpk6n{hX#a%*s)mR|m40_)1?-rS)M{rfEpL#O~{rxW< z1%unWmxVXPs+8u2o_@Q#Q&o*ewCjaGk=*n-r4r4ws#KboaicmjUpdR>7!w9zUH>t7 z(C*36UO5B)v({BKN34N>x-uQep`RQCzzO30sy1?aby?*UM=D1@*kV4ONON=Jf0AlC zi*74ns2C-thZ31_G?V?whVir=A~Jq}Z@8O*bkp zP{pk;CFIm*@8=mdu{|@qfe-qK8wj(~Yb_TEbl^Ah?hh*xNn`q;nV{R!Z8g#6D_IIE zm04}$3q9L%!ToV;riTgdvfP+BtI6p#qISN@;MC+*V^&5N_VQUdm=2kxLA71kse$pM zEbFWUE!XJw405K9f$HO48qRYakm;{3eIe!!@fdN^V{q}9d|i2!kI$2M!gb^DHK8u= zMG497s`Dc9A#%f-dpJ`wP(=yrY|2sR0!@M&1jK!hqkiOl-%K|thXyrnRDB*&yJJRZ zshLApa39U>VI=w=c@#uqDhjxr3JlZ@WRquaHjvE%`F3TN)tgF{Qw#rW(?XOKqt)u; zAGRfePKM;vN#Ik2vxK{1xi*zcr_?FUi)5%rX0`CoPv z9M4meZ$2`3x)lzn{qMxlhJ%0Sl=}+9x$8u^T<$8uYTR{Q1u1t!US&2DJjp7=>brc{ zRPxIDW`&XDqkp6(AgR$<_EqE8r>zM^=+v@nK?jeN^k|W|-?= zqh=Ws3cu#!nA7-RHZjrZ0F0*O*l}2fl#I8`yoB2Z>8DZuh&CSny7c7IoOIwvLaH-DH)sY=c)h1C9rRWG9M2ml+n>I)UVR} zf|GGtatp`dpZnXp&m*J#Z-kv_X!mTp%gV{BWR575oK6I{fcz;Vzfjy_ynxpJ9B?31 z=Q|&MaN+?&N3$Kw65>l}E>_@jY|?Au`tq`n-}Zomon!v@rcIR>1Jl8(#!i@Cnbwk| z(d5LX#=g8Z4-&Z*HGJhVE)Pm3iC8l;E47jOHgH!E>VvgG4ZtI!y#};>CPDN=gz=!O zCzzx>JQ&{`T!diGUL0n&tmN@r$f3xyW%otEa50u++@3ClSVdGvo;I~7B=2NsAgbK; zhj%q$dLoQ~#>pf=&uAo|^S5n@MnwW^6{~#F-u%n}Ax})PmR<64-rMP!v>MxGjp>i$Q4@g+`QTfSMo{n5#cLHo4HV>XLvBqB7JaN7-;$phR zxvyx-oFeS--Ybgq0J%d=sLcM1z*o~^N$28A9=-X%Vpi2iQtNu=W6sBLN$Swh$E~BA zMn2aw9vTPz_(@MHrBtEz(=kM(} zol~7Ko)L?USs-3}M`bnYnyGS>u|Jvti2zJERo zQ+D+Ld6Ur%IA{ijnbU(YxTDCg5IM)sEaRGXfS`qPn;|y*nb4C9OR>KfG7_V{b1Kp< zEIR3Iq{*7f1Xhwwv+<5xDa2V6&IS8AB_VJ%!LH-gF#^-o&(D4TAh!yVDGP9lf4ObZ zAi^{Pdw-%@Q!4_s28(~<(ntp%GOM@|H#hmA6lHZwN6Y}0&Fp5qv5 zGLI6ym=ZEpATiFy)CDiiq{KNNK$Iq$%xxGHYt6T$Emde+Xa~d0bsa~;Cdh{q(Xk?N z51UEFF7hkCFe1TKWh+){zTR0&O-t5V%SO7QU^cdDV1&12vl6DR9+U)~cr>J0L)TR2 zi~QuN^Wve|535^2b6r(al3q32xKR0ZtaDCLl$lp@kRt+CkBnR(Az>q42}6=Aybycf zGdz>w@1GEgt!GWOTdl*r3zG+>+eQvH6EPWv&yWED3UR!Cu1t?^E|y{^H~4)yr1Jdo zz$Rd#ESrocS#o2@2fhJn2a^brWKOjR66vH;(Fmyo>N39J0ZrSh3(2hMYRIV5$KKXoC;$mqrvdO#8}DKR2pTh zn$%(nQezlKbJ!~LRY3eTVmlA6!Y(al#|Dzxv7R%;-qOl$Omw39;&%M-CL?3xq)$Mf ziqc+0yKHL|n>J$=uFBs*x{19;W`p*w6A$;r8&xevDyj_nC)<>!@AVo0*51AKAYb_sv}Ge?_@HoQx4Dn{*kzbl3WE+Bs}#4WGHDop5cV zhmHieytcJlM7Jq;Z%?vT9NN&yun2&k%=NvSm~S1){$0iLs^?&&N=)qmqwL7QojbZK zM2D{rdJrF)Ifk*=q=goO#CY*Rc*V7-(fE;87?}~_DDviMmlWSD>PR28|sm9S*aUU{O{t3xGvQk z4~OOB@k-_jUZTNME1f~QBwg&scEa+~W$HEUVJi{EuhKL#VP-V~?siW70r9lI^aUsK zOS7+5>`p4OUS_EASU82i*y5>MC3mF1IM#i1n%=HM6tLxEY|kpQb7U&2@Mwm?K-A;y zh^5CxE#|uH0wLp#fZ8`TYIf~iqcco^acjMPR6bEq-)*b?S~<2ey4e4hhz}<6^mE#d z8?JX`AS4GS%5^JdylrY1rE@UXHDg?VMP)>`U$J8V1?Z%4tofjgsi=ESD`FSqpC3g} z@hWs5R>@{Z^wTAFUP(RfpvS>YY}5Go!h{3*1$+dS#4`H5=^Dc!%q7TCEa+lgo3y+h z`o`OSocQd%EoX!BV^%bq&WB0ekFaKS_xG!`Wtq>a`ntmJ<*poiGgv1T$bM9Ppj!vk zIM@aR2bQw)Vc8!ybQ`D_YwtUsZ&Ie%oWT{! zm+Gy|^%eiRV+UeN@Wj?}muo%kvf(*HS9)%|1aEpJ$K+ogCs9C$kA9~)Exxg=4JUY; zHabS&u2%0a{YUNFY@fw~pd7exbpfx)Ly(kB0T^%KE&v*x4|H9R@9}uQcD)C5w(lVC z{Im++8MeS;Kkly#?|yiX2NqlplP|d8 zV@@!`?TR^#eR`{ArEbi4$K4rE8ve>8GIT}pzKEVP=}PK=K!V!S{>M?TfEz5oHx3^j z0|t`%N2e^@P06Ot@F>HL2q!V`>e$k7=o(GwVOm4%l!{!^pHkADrl^#B_gkE`+7QVQ z%_+otKPrI=@9%ntPf6MW5`2s=#FI-E7q(fo9Lz1?iDZi=xHR0QupW@u)70;RTr_$B zTIa-qA=92;qb7vPAT?J4`zCJ~@0@}mWF6u#_6Z+(DxyPs)&IJ|u5TBsKbia&Tn-10 zoSF1$B^Ox3oI~hP9KtbD_iY&2(0)4r&!NGm2KwKwIK$IK%0|8$j*kodU))vL2~MXD z3e+nX#vTG_KP1*vt&M6TO$UhpqHWudpw;;Oen=$Z8H_88m%Z{*Dlk&})7@)tI?>z& z4<{;N?}XfXFxI^bXT{p}I|33+7RFN1w2m|MFt4LtU{UU8DEV!8di7Z{X*-ExZ~%q> zK1PPoW^WB@&TqVYf~z?mSB1DuI2Z<(d8ilLWZ^bKJtj$&GIB#b$`c?>3Pvi#5(T>19#1{GId z2B>t@Hl^WqwAe@W%ajei6-XlsWk_D~0$I^isGjx61iPV)tpEdLcwYj5n9gWFOBI%(gt zLE#%Zu}kfhmfP8Z#ZB5c%ZDS>En=#0~ZV_Cj&BrJHEc6BM4= zTA0_>69pYsX>~%-|Edq-e%*#1t|yCk^RXhr8-4ylztOV#dTlh|W4qZ0n=0anu@rZ` z@Tp=pMXb;fSEr43r96CXg-B>kIGWj>#$Nv3gV2rUcRQF8B{zh8{vsI@a5hBI}G z*nX72ibAtvTtxckCT**uX1pnXM~fXyZz*%2c5)pQ$LP0yoU+45vy`@^d2ZX^4)xDz z@Mb$;8lue1yS?3%t6_ov$F?x}jxal^ zWWkW%to-qVB)PMghO>#GvnjW+qv;0$Ff%f<&@nO5F>BAs$004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Ri3kVes1{nNtL;wH_+et)0RA}Dqn){Dk*HyV7$Mf)=nfpBVoU<1{>~qK0Gk5OH*b|3J^OZ&;&75=g{_M5( zUTd$tVO!hU*0#3w|5x%!cVMGB_4Elu6&3M;0ubf>?*L681>wo>{p-&!@TsRybo*EV z4giOMdw~g{2pFIZOauQ5yavPqJn_AgeZT3i)!30K$Uis=?8$$JfXzpA5vT%7Kn(Ei zb9WB;;?pOvH~V%E@LAwtU^h3ds`*cMK=uQV051dosIGr~waUUrrH3N-XK;7>n&qT6nOv*fSlZ4c*<*mXY99f1?Clg2E21*iWwmD44nJR!96|YU=45(_!4j&D6P@%TGG5v zzbwwu3;iCt1vqzOfCuv*Ww3f(%g4FhZr~Sy4}do^%jF|eBu3!vzgi8NYX|To@OfZl zlc2q_xcR+%L%_K_;DP-2W*hComw{uzVc>n>Csw(1WQx=S&ONsb8{jg5OBcFQNCSuEg#@KDT^m>qkl zxodh~XGhJ)T3-|EYid0*Yp{B=0YQMvlgJa3A)>v}56{?p6 z+DRgfJ4J1^%eLMgvbENb)>;+Y=tLT)-GNYKrANfTSpg1s!TP{GmcF4bb2`4ldKfK) zo{kOsc4FMKqZ39;qe4^`^+GT{&I$r(pjwEOAwj)tzM5-ki1UI=2`WQN z!Hj)t)|-6Z*YpZtPk#G}!N4b;J>50&UcviH+k5xxweUZVsdP4Ir8|Y~K_SrL%c7VW8ejZ%P86lE&wO~EzwtsG9BmK|a zc=*m>Z?(rzdZ;s!JRBPhbuH)&FJ3TX7c=}$&LqG!;GcmX0Zn7zU%vGOa^l(3-S;Me zKLWmpO9?FqnVAn3qm#QcSqKMl1WF-?HCqFFQ|0u&WGs2KZ8Yi&-H^)FhE(QP{Mq1= zsF#870MiI~hd=WSz%Jm=fiJKk!!la3g0G2pxdhsm*jK`WgZ20rl$ZrCHBD2nlbLW! zxRHdQUN^(n6nYEa7z^fK`6!<1EWPqn|`_MHLc){0l^7w*pcei_?1-!`Wuo+M) z+G^-(@Or^YM?h#x@kT6Bo%Pp@40s&)vH`vV1cRSfjE= zSB8_r=_gUvPJ~uRq@^UL8N4#l@h%oO*nIbPj&7l zY%0XDNFBFR!%EX@1TRlya>HL|@C8?}f58B!fd69sVbX%Ohazo{xi9!SnjDKO>HT*F z+CmbG)by5UBBW+6YJ~qit|Wtq1$Ys7(cr4|e+T>z1L|3ZEGTJXQqVl!Dn>&cO^)Z# zAD??$$l6AhtjnBMvBci{mA3y@zxmuvGk*;HJ<#?J|Kc;CG=Vp9#qR^KI$JMmVe)G>> zMw=aQ<C`t#p3dm0gym}tlT^zv_v=)#{iq&%5O5V)5H z7XX^xYgn@S~-Ag~y7^FzIZAjD?YtA=kBI%dw>477r6i!7z9 zVS7BrT9veIX;w2+y`Grq>C~HBP&x^2Vfz7aUT}rUUagt*&-s&#WQ)TY-eaM5?7ivb z>OJX4)sBSzf*C2!dpm~a&G_(=8LqTUsn{V1Gx5CcAcS_Oorpi*UJTDfP47~mSY+^& z5Q}87u#}4A2LTGGRcxY?v{mAomHI*|)%ncasAgudp(NdvySpMsxbx%->NXAQ1E2og z<+IvDz+S*wT#Yr*B-@$I)^=qxHASu_5_l~s26a;@)MThoH|0W8N`=@Iq7Gp&o*fFR zfhpbyFO)ly_=0g+!!nVpI45FjO=D+L(uPVBt8`kHR?|weZcV*mrQWjAij_2@r~ci? zOBEMsCV-g@0jq#Pu;@c9S3i2cc(WetYifseDIT$l?VkT*F_MXAGJKQanQLLQbfz@D zr|P#Wiim)s4z})fu6-CToN1VL9m%Yc*>3f-$9lb195hE1x5vK+m^nIit6Q(>(iSb_ zDh2DR->HD0_?lXu)C$T2aTxDOcGu68W+pF23p*0Ye1)v)w=RAVExg?_X}n@tUk_}M z46sjxpRPQ(8nkgA$GOEFpLDqaLCQ8vf2qFz+MZ;n`Fd%(^mcLf+NE&mQYJRty29VG z##9O5)`Vpc@EER&xs@k~BEWs|=&Kg};qznfm5uV4xVG5BwiLH``I`YQvbA+8gFCl< z$)Z0ROA8CRNq+`B27DTKXQX%T-mV0Fcfa}pI z<{L0ya@PrW;*yDba95m4dEa}B7Aejv-voXFbowp(YledST2TS`E!?iD*ay1nZcW^+ zfj4s4>zf4~q@z<1XApY$xcNPbJFM?6teqTo2B-tN)=j{Hv9ISY)q}W`iG6t^7PsQg z;jH*RaMf37rJ`&V=*OXTgZ*27_!KkkLigE1J^-4JY6nnnB;||We&Lg5+19qUwXJRa z-0S}UZIcb;sY!x$0000bbVXQnWMOn=I%9HWVRU5xGB7eTEio}IFgR2)F*-3cIxsdX zFfckWFk?TsO8@`>C3HntbYx+4WjbwdWNBu305UK#Gc7SOEigD#GBG+aG&(RgD=;uR zFfbBg0QUd@09SfcSaechcOY6Cgx@G{a;ABePT>%h=S&#LUDT#0SfONT5nC0O}VJbn-$q Ql>h($07*qoM6N<$f}C1hzW@LL literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png new file mode 100644 index 0000000000000000000000000000000000000000..d1633bc024eb5f8de15de4288d08fabdea9cb8c0 GIT binary patch literal 4390 zcmZ`+WmFVg*B!b=8Yz*K7-47_I;4>f$svavq&q}Z-~pvY!gBbQl5G00{&H(}G z8R_QZ{q?Q)$Gh%2cdxt8+UNYb=d7JzXrM_&&O#0V0I0OJ)Q#^^?>|9Ge1G?48wuS5 zp__`n3II^|f#MoMbpPgb(lXWu0K)hHfS5M`z~wy^vjqSIiUI&T4gi4MCjfxSH}9K~ z{C$GhQCCwPaQB}BZ!b%~XGj9H^)*QTP>?b_AiYz!Q3C*|`?S?nOhe}P^Q^pVW=!(x!<+S&Y3FLOtmLz)CZ2tp zv4Vy@OF$yIR`4`EbjESw??bywG?;K_os3W&f*-<zisHb?NCwUvn(?U1uqOaDHWxR!9p<+%-=rP-K;Ql|o3jP;bDlW+)N zQW>85AXI=r7!Xa7$Ak%a480vzl<^~<#8V)SApGQSoqM`eb8JYx)#JOb1Voik@$yL<*8lE`Ksja7vZ5fRANYCKK$nBih8^C_KRXI zSbY77gVE|A6k78IsOIA(zDNpg))j6zTXaAiB}!0Y9n<)zv8Xi{ciE2;V-PFSv6evW zCUA@#mBwh&+Y#7lW$0=eq%y~5U&VlLW?_XD0HSz%V#UO8N+eY~yK$b$q5|n`X{6;Q z`(Tkl8Nrzh!DzoIE2ZYtx3RI}WA6 z`nE9p!DoKp*poq5;tmQK1dbLb9R0IgXZP-3GzAKw2SGvaouVzh~6mYbRn`gTsti@Ju#4gHi3TPx4#YNGz{Tferw8YB9JP$?(|INh@@Ae2LK|$Glg%}Ds}iePbem~5 zBy{8_hH^Jt0YuW?;bhD*7t8dsLSO7D?CgK&dwu;rhpxY4b!s%l?T7R>q^B2{^CvS_ zs8;jr#5+WM^g&gOs7lI~upt;7n9msKi&xuIch^66g?F0NZS0TdnBxda`DveS?t zzd>l|1k`d6!yLT$QaYZM5c$XPu!6yUfZ7T3Jjt=ai}8mW`P6zBf_7ykwTUt%z4uef zn>H=K3kGqip!l1RVr{!3grnz)!uIt$Ie+|28&txS*MgVTwSl^Qm>oWd$4EG8&{G%O=1{Ut zjSNZ)#7$KN;)=9tX|)$}&mw@c%6-K8le}FpNj(<>PeuLQ8Wb}Wj&(>Lv_36B7Sr}JpMe9wx+5#%hILTd$Pf=vHlTH zG1i)$L#)W1>{ncss#pWzGF6W}sb2*{sC6YGSYe<4A}e@~=}pR;rc!zmR~{+v#w0dH zNDQ^8A3>(-wJ-3Cyyb3(6AEorr8C-pHLa^v0@Pj2QT>tILRDeG;4dWGamKpn*w^2M zz<3JWVvIj6F99+QoA%liao&7SvWWfL{-kFGFV!nl9FB1hs!hOtJej2MSLUMUXtrIDn%+AX`&PVl-7!9eD=E$ z^v#`Q`NW)#v=Ax}FRF*p+M`xQ+tY*y4B=ZlYmNZ6SHZ2c=D7lGS;yY=hgVG;N6U8* zfq^&z0(l}E=X9b}6+sUt3uPPgj0m|*g8%}_F?uf^;}##ut+zof*A2f@&yLne z^`%;Qeg{+eK03Epte+pN(?%GT!>a}gTAmh@(2`V%iYhe?*GkZq^VeIY*cm41ai4#& zEw`e43$4fSt>57f1<^!5K6hK>LZO71EA1zKa&-AHGh{l3S%{72xQD)sK*M{vk}aVp zP9Hz-Lb%Vz-_V|U)^t_n-Eia70NBMGei`|rR<1m>7Kf|yr2W{YdhB#{ZY0OnC#ps- zZrJ#OBS|<#EmWN=xl5D0+=?ujF&%ZVyGS_Ag*zjCfpPw)bsK~W;D~W-)eU(+8A>Y4i<6gR3+o_5f14cDz&Sr%2XsqZn(1D0 zHF8UHrHoON3VOfL9w;HPp2HuJ$4O$6P(+mS7r}kuBeUxKioq;|aej|++BRaZCiuzv z_9qnH1~^z5))>`Ufpoi$E}DZ+%wPZPsGaAMTzVP*;r*sp(@`Ch|Mty_*+kjHXYef) z{Yfb%kBNi+m>ap*jSbn1weP9<~Uudo9ufeZj#cbr#i%<*q zMO?0vjXgm9`2Ec$e@oa_cBpOcbNOb?rHRwb_jWp6*YERbs-P^+t?Ik>M}>})Gk?!5 z>7jGaNR-ee6N`SGj1CTEd$%aV=;(-b zLsbTwj=c5w1}@&FPxzm_2RrX2zz@t@09p;S;$MsF5(%73>hcB;jUBVHOsw1y>g}s) zEfJ8LlIQdUyVnm^kq9e2IEWV zkANoqfjpE>ui(gUTNJ_z-gwm`jVOH^vv8dZ zK!m=R#;#H3%iN620}!JV?O)!mGe@%T>iECh6M{iAhywH|*7Gw@CnTyY%~m0^7{n@J zXLHQzuazhGs*tu!$3q3_lHacUHn!2|wUBAl^QA2Kc#O;7@4~y(Mw-Hg=aOTVYI2Oc zLCOx%w9^Os|JW*7fB%N{sC_wg@{$k_n(KU#?7LAm^Lq1|Dmz>}&pLBW%IP2fbdMfL z=`PZDH=_l}*zZvI-cnt-f4Ozy+IJt*>ogn*TlaprP`UCo^zYu+i``MXz}n_D$s&9q zjH_&}2eX4oD~+?Y+eD&{LHn`7aQOxG4ECb8qctM13)XS01G&{3PTC3fRrT=YXl|CR zvXGACi9SSrSq)pn)4cl0Rp}kKNx96FJ<4Pr$A1^Omg&lMO0ZT6Re0z1m9RzM?bqI& zek`!pGtw@8e+e_@3B{Lp8m|c#Y`Bu+tzO^h1aRPS{M;vQGr>qriGC;^Jym?LRFM;E zlXmbpW=`R&Y{BiWoU%S9yp`vVe``HYL$Z`Wtzhl;VlOtjAVycvXNFyJ?j)KrVu3W< z47SLe>1{B@UDnEn%jmYs>tp}ft8eQB%a(JF-F1Csxq{5-g*?WOie^=|!bHFFBzKFo zG*Z0Ggqc|!&1t`Y>#KtMNmZ~*>f^gLU6@>-+~&DJj$2UHF$`|s9JawlYvG(bh3NxIO z-FaS(b$-sqjQ4V8jMReQjuI!|%|-qpYJ;_fu;OdMTMf;-Pe~|^z>R;Az8uo|H+Ca{ zApR?qa0$Hz?`QBTD1Xm9%u}dEIpF% zm*j6EP9?9u-@AO?01e9kXGnmHoRgo+JpjZ+#l(Ri5KvUcR172sl8_UX5fT-Z6BQK) z{9ybqfj7+A-8J-o3sh~xEA9mrVWyS=#*ko6Uq6_uyO#@RK&Y<^r@L=}695pB_xq5N z^p*M3r-LuhL)sh000M4`XHFD!+;qC>^aOOA>M;^OoC&xaOgYiBll?tIJ#0NB=ol@y b&NILc6HUOOs$}MU3xKwUfqJ#7!|VS8f%IV` literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png new file mode 100644 index 0000000000000000000000000000000000000000..d384cce8c98673c868652fb1bd0226258184d895 GIT binary patch literal 10849 zcmbVyRahKNu=e5}AXoxyAV_d`cL?t8Zo%E%Ew~fh-F0yY?kqtUcX$8!{=0K=ZcfiL zQ!_nNU2k>ORMkj?f}8{zGCnc@06>%cDXR2A8~1nz=aV2I5GeLxYGatJo~J6Mc$7S@J7-SqJa1R zmfWtA#E%{XXGvKxgnbl5oKI+R^rhnf02Q>PsE~^L@@cl^Zym79hTlG@sH4QojFF|s zAGTF`pih-VY-D*{ES4@L_jA6jVVh=lz zC^nA;Vh<+(W+EvgKC5GpAy$*z*_chwt*o4lgssGU^$)VyyOn{t_EnAfHtxJ}{{-^; z<961zpMwJTe`^&`nul1FUu1F~Zu$WlIOvEtQi1^Y@xvc5M!7Kf+BiHrn*29KH8o2f zc!Bf=;5!>RBq_v7s70vbjyNO(>@R&0?GzS+NZin70f{|A5LrlWsSQbAP@JEQrhxSK z1J%EYD*{5&KD>(hhN!LITH#tTDYguWVaGdL4m~mwJ*IFiU?lwa{fWMArcpmZws}%_ z8*l>k$a(8tl}$wbVSC7Gk(QuWgBwN-?~6ts2fe5eop4_`FWh?E1kG{c;`rXcKCf9J zxiyZic79)aD4C&t!JR?d4n-{0Iz#D)=PdXlxf8k*K=Sytj!iLdvb$lnO*t3))Qga# zAusj#3cZ>LHP2V5KkY%9C>GE$XHMF|qAaYXy;1L0T$693+u8#6l7W;YXo1KV^pI~5 zb_2TVKhIQKA&i{wPx3fxc8t6a59-g3xgeycyD6b^V&U9Tva>~3KHJ}lgE46naFco9 zL%skv`=B^Wi0C~NIU`M|D9+uhZ#Vx$a&PtNiXdh%Qx-W0-8_bdwmW>Ph~h{9dYwMv zYLj7Kz+hZ=y5;qU$u~sUBY1qb?O@)4EIXGrAk986joc0$1ltzF5rdPS(fWJE)5S^m zL{~R_g6>L70M>XDCuTQ#YiM%a5MCd^%RWEnH0(IOBDduY_d$?W2gdu0I|u#zT#a0| z06;Q;+aCLY$zCwHYzTGaKx^sQ?#ccJ$Lzz>h+9;iz>Du$)=a%w=iK3x7kEV3ou|j| z=MU05z}LUB)0u97GlEtecZ@IQk33Bj_EFbuanN=EyGX_&DJ%a}f$nkhWR`2%E3_U7 zd*N@!wUw8T7&rSf*8rcOo&#E3*ZUyhUnfC-`&)U2e;6zy z+X0@NPayWl4v3wsJzK8TAwqS>*sqiqLOKJ&Uvh=jIa-qt-pww{wK6ok8>0kRm^DH`dat(;v|W)Fez77!ZQ}*-Y{gg+4M?dZt0t zwUzx$pc*ljRcm@F&BQ|M^e{9Bk~2rGfcp!*N3PrufM)L1TV&go8x<$2p< znx)aTqMR7`2Uge4td8fL6juwiTjxXfjB&4vz1|1YiNKq-U)5>4R(#}_R#ZK{v{oB3 zt&J3l)p(|IDBIFo1zfKpxD%oia#Y4&Qm-xS`-k#3|ELcu^zXj7S95S3q2g}fel zpX|ftbnogbp_4A%BkcQKf83u67Yb+a-+HSs0v4#u>|L3_Qy@+vEx~M4`YPwRN({bt zS2X+Xz7*oUFzU`uQ-pZvn(>CEjkh+5zm*OhkOd$+b{{vMZQI`%jM{667 zyUN=5y8DxIcW6m*@X@fn(K`3}Eu3xd5CC!6BcY4zIgpFi*5B<<$RDqVW&tmQMbSS@ ze@h%-4*CFD6V8>E)LiQac&8#1Op7QMAJ9>Wk?m!bx_oT7Zw}nT&>vb_z`zv@bPg{X zOl1GF@(TrT=NaEV9?iyFK0KeCqE>Np1qJ;WKM`Kfxu?SRf6B`I_^fG{DZtDw=n$pKyPP`{hI)u=zR z1zCD`jpJ2@U0kQjb!JgIrkMJ(g_eC3z1+^)g;LBbYxQbRIfx&bJ6>&wqXoRqk)mds z(B_F}?l5Wx2I=$I&nY*F5SVpwVwg0oufbC69d;7vS$fm{ z1Mw-;48iFzLdgzz)})mDkbkN=0-lZ-bBJ0Wg{mI5bge~f%M~S^s|+R3HnuWDQ)qXNOj@Kg*x(F(RncN+l9y# z4<}W`do0p*in$;c=Qwo(7_$LluUfa#>1rKoXNJ~Lbo#&@i%^vpiMADAnN$fqM%-El zeuu*MsCm+B49Q82c0yENu)xtlQ4x}8uy>a>-^#gc^llJ}$yHO(J=sp$sPw71yOU)= z1#03+TcM7>^EkS2kwJIT*_RUqY9^qs@#Q5RE$*!KBp)LmaS2RS%$tZ_M31Ai zF5S)?WCy@+12Gk4?!TMo2p#adh&1QW}f`bEN4ok-DhM^gp-$;`p(hY~0G57s~#Lmy}P)EwC5VyUr?XkN4+jrXHQq zjukugsy+xT*TB%W(Mz<@#8MFsCEMywMx=PC|8p0o37D}OoZHN(>n9bjAQh)!`%d`z zRyq*R-(8o6+wO};LQRCP$ zC1hac_XnEOUrsUE>A;4$2}Q4V-MZ6(VH|{!5nW83qNP;8*|`YS+PYAROiS}xS|0-> zl6CL)_PE(NdPZub!8+6jNOXO25Il2)KcjTIwWaGkoK~HNU@-fW3IE96)A~Ql#Dr$3 z<4xg}=N?&Sq_7>$H_qrJ%7Vem%C~jD+BF+uIEhC*&ozVLIC`+4-+{HS%TA^vxXUiE zU(!Z&G)vHF^UBs$KNsxw%+(og$jP|7{>q+NZne-jMb_Bs>a@1G?g;sMAywWFQktxq zA^)x+x#N7{NX67(ChKj!b8Jt4t2x= z$pQDwRX8Md4@qOncajLkSJP!_F5n9UR8XO{z9OV$mBDtomR+Ac?&m%FkEG>G4Y|ld z3Wo~cnXKH|{$i!k$|;-c-i?W?x0R1;CunH05k|HiC42<}A5?SS5$1UPvcJ&g1=|Ab z3xN;27Uk7KtG~7O>97r&*=!P*W@B7q(0UK6qPd{*kRmH0285{bCKzfxdvps~V%`=8%e9zzhGou)epyx(gCqHFpM=lnKV#4CvI zPuq`UQY2PUSFbug*4Ku;swCp0w{i{+P+qMBaiqW9%jIlH*N%MWX{u`w%2;Pq zo0iRXE;qqh&cOpiY_;=n&e+118EtfC@K?ze@&1uw4Gk`@*`~>1+y`a1qd<7MC4C8Z z%<$BsNLnq+75)wsHtK}*7|!nkJ*(pE$ih}~@VB#*)+}5=)X35rTc&#j*tpG|MGLYn z8%4B z$xFc))}3j{X|s>W>K3EEfw z)r6z@&&!Oht8uzLsfw{%R6Oqoc~PLtd|b~Jb2p*iB#SGsFVxi-o5BtL`=>Jc;KKCz zu6aTRhL&V@NV2L#XRn5LtuW#T;w2fcJYYl4r(5Zn1Ps;;vxioC8=Q`Z%i1xj%m zwF{dqU}@lKM}Ei1BWY0l6fY&B+VT(fk{3Y<7JcisNbZy?8R>EgCw7glD#5e9$EBKI z?fZ4^?+qwXPe2LZn+EyMmZJ9)-Tv3<3>{|!edZ~W1vgbqK)a2ff2tajRIPiDR+zUx zD|!Q)w=cW09bIMV8zONNv)GO8$YHbjvz^@>usbl`zN85m?sFwFoqs8uz`8c~_xmYl z^XH?c>6L}JXF9t4!HVvULN z+*zttqOOSjJ6lm+{W*o+N~}HC2_@Uw7dKJ#2LwtYs8bI- z=dpLh*1{LRzWQG-m0y69d%UHegQu2N~ zjv~*<>3NrAQhY@J=rYLde&WH^Ev7YxGw@tP`UE{huhjRpG z-}fW-es~W({;cuC3U0|lPI0FK8XS+uK2AFl1|$|U>}03e&QV+YqIdr zU&0HNq*t8>DIu#`S9XT9B4Zk{?8=77t1G4RNzX?X(v^LiqR-mzT&1-2eBlmq#Ip^# zLq0PzMLG$!pnU8}QfRkePlM{-F`5t)EwDq3=h~G0m~$4(Y@}niLgt0|QQ9EWpV%5v zjxA7UX2g!c$&T$sijrqJ4!2gg#p+IHM|uv-l!!Lxr2;^H{pQb-!b=uXw`4)JTHDzV zJ-VT9>Zo0)s3M15EJY`44f|eV2k-%6qQ+k>J?s$Xdi81yM~CU&i0#6B5_?T&(Ef7B zVWKtR`nglr5@_$|q+~AA!w1m#%fZ!9u=JwzI+{q)-w_5d!35!6IVRFKJrtDi@?hSI zu4N<9eq1L=;%rox?Rv4Qv9iI!+mo8hnj1tt*plc#_xP8pph-nx4hW0O5-=tMFG)KK zP$Xxsf|N!4_42V06tnTNZh^lzNn-3M)0O6TFaITtBkz)2jiAGY{maB;v7IEV_1{1W z7`2-xKBHjMe?;6rB@dk-e19ZkN8HcGyMsbZ!MJTkqcFbhRDAEd2{Vwr@4RxNFh`o+ zC6q0+dfUu0UpJG_Wh($2pcL6YU z8EdyWN?KZGX*^5%QHRHySgS~H;Giq)>o%DPV@I&I{=A{N(j%~al#i=C$XX?|fpEc` z^xBNDE9}|*%-qfsC+&Ji;V&2gu8{=8z4i$&W2P@iD_Q=qd4H_?yP>aZC}Qm263{g? zp0?fEZUpl=N?c?sFS2>jQH`OEB-lEkgU8mZA+S@iHvVsA$j$8f_fpZ~uP+89Y{P1f zzbyjJBNr@szJ`6Iow1iX&{I7M)5Vi54boHbfF@?p-jpc3WDMZEmU z2n~yDKMZ%K-~mEHD>wmsKCDe`{?7&!)DoD3Cog?LOJr~l1gp(WG^Ryt^X{ejRY>$t zZ0g=s|1KZawfK!k6NP`;=yU#Cx~ z76Il~7g|`SgAtzpz!9pAO+V%>(#DQ7T5A5fzkpo7AB_zQ<44Tf*!l+k9+6o>1ql>? zzGdOJ@wbG?E3Av|v3mdDFnVv$H@W8PhV6@UAi8i(q1s=fpj&Wu4HCk0;guiLs>jka zlJiPuMWC7%sHHRklHQI#=5mpk6n{hX#a%*s)mR|m40_)1?-rS)M{rfEpL#O~{rxW< z1%unWmxVXPs+8u2o_@Q#Q&o*ewCjaGk=*n-r4r4ws#KboaicmjUpdR>7!w9zUH>t7 z(C*36UO5B)v({BKN34N>x-uQep`RQCzzO30sy1?aby?*UM=D1@*kV4ONON=Jf0AlC zi*74ns2C-thZ31_G?V?whVir=A~Jq}Z@8O*bkp zP{pk;CFIm*@8=mdu{|@qfe-qK8wj(~Yb_TEbl^Ah?hh*xNn`q;nV{R!Z8g#6D_IIE zm04}$3q9L%!ToV;riTgdvfP+BtI6p#qISN@;MC+*V^&5N_VQUdm=2kxLA71kse$pM zEbFWUE!XJw405K9f$HO48qRYakm;{3eIe!!@fdN^V{q}9d|i2!kI$2M!gb^DHK8u= zMG497s`Dc9A#%f-dpJ`wP(=yrY|2sR0!@M&1jK!hqkiOl-%K|thXyrnRDB*&yJJRZ zshLApa39U>VI=w=c@#uqDhjxr3JlZ@WRquaHjvE%`F3TN)tgF{Qw#rW(?XOKqt)u; zAGRfePKM;vN#Ik2vxK{1xi*zcr_?FUi)5%rX0`CoPv z9M4meZ$2`3x)lzn{qMxlhJ%0Sl=}+9x$8u^T<$8uYTR{Q1u1t!US&2DJjp7=>brc{ zRPxIDW`&XDqkp6(AgR$<_EqE8r>zM^=+v@nK?jeN^k|W|-?= zqh=Ws3cu#!nA7-RHZjrZ0F0*O*l}2fl#I8`yoB2Z>8DZuh&CSny7c7IoOIwvLaH-DH)sY=c)h1C9rRWG9M2ml+n>I)UVR} zf|GGtatp`dpZnXp&m*J#Z-kv_X!mTp%gV{BWR575oK6I{fcz;Vzfjy_ynxpJ9B?31 z=Q|&MaN+?&N3$Kw65>l}E>_@jY|?Au`tq`n-}Zomon!v@rcIR>1Jl8(#!i@Cnbwk| z(d5LX#=g8Z4-&Z*HGJhVE)Pm3iC8l;E47jOHgH!E>VvgG4ZtI!y#};>CPDN=gz=!O zCzzx>JQ&{`T!diGUL0n&tmN@r$f3xyW%otEa50u++@3ClSVdGvo;I~7B=2NsAgbK; zhj%q$dLoQ~#>pf=&uAo|^S5n@MnwW^6{~#F-u%n}Ax})PmR<64-rMP!v>MxGjp>i$Q4@g+`QTfSMo{n5#cLHo4HV>XLvBqB7JaN7-;$phR zxvyx-oFeS--Ybgq0J%d=sLcM1z*o~^N$28A9=-X%Vpi2iQtNu=W6sBLN$Swh$E~BA zMn2aw9vTPz_(@MHrBtEz(=kM(} zol~7Ko)L?USs-3}M`bnYnyGS>u|Jvti2zJERo zQ+D+Ld6Ur%IA{ijnbU(YxTDCg5IM)sEaRGXfS`qPn;|y*nb4C9OR>KfG7_V{b1Kp< zEIR3Iq{*7f1Xhwwv+<5xDa2V6&IS8AB_VJ%!LH-gF#^-o&(D4TAh!yVDGP9lf4ObZ zAi^{Pdw-%@Q!4_s28(~<(ntp%GOM@|H#hmA6lHZwN6Y}0&Fp5qv5 zGLI6ym=ZEpATiFy)CDiiq{KNNK$Iq$%xxGHYt6T$Emde+Xa~d0bsa~;Cdh{q(Xk?N z51UEFF7hkCFe1TKWh+){zTR0&O-t5V%SO7QU^cdDV1&12vl6DR9+U)~cr>J0L)TR2 zi~QuN^Wve|535^2b6r(al3q32xKR0ZtaDCLl$lp@kRt+CkBnR(Az>q42}6=Aybycf zGdz>w@1GEgt!GWOTdl*r3zG+>+eQvH6EPWv&yWED3UR!Cu1t?^E|y{^H~4)yr1Jdo zz$Rd#ESrocS#o2@2fhJn2a^brWKOjR66vH;(Fmyo>N39J0ZrSh3(2hMYRIV5$KKXoC;$mqrvdO#8}DKR2pTh zn$%(nQezlKbJ!~LRY3eTVmlA6!Y(al#|Dzxv7R%;-qOl$Omw39;&%M-CL?3xq)$Mf ziqc+0yKHL|n>J$=uFBs*x{19;W`p*w6A$;r8&xevDyj_nC)<>!@AVo0*51AKAYb_sv}Ge?_@HoQx4Dn{*kzbl3WE+Bs}#4WGHDop5cV zhmHieytcJlM7Jq;Z%?vT9NN&yun2&k%=NvSm~S1){$0iLs^?&&N=)qmqwL7QojbZK zM2D{rdJrF)Ifk*=q=goO#CY*Rc*V7-(fE;87?}~_DDviMmlWSD>PR28|sm9S*aUU{O{t3xGvQk z4~OOB@k-_jUZTNME1f~QBwg&scEa+~W$HEUVJi{EuhKL#VP-V~?siW70r9lI^aUsK zOS7+5>`p4OUS_EASU82i*y5>MC3mF1IM#i1n%=HM6tLxEY|kpQb7U&2@Mwm?K-A;y zh^5CxE#|uH0wLp#fZ8`TYIf~iqcco^acjMPR6bEq-)*b?S~<2ey4e4hhz}<6^mE#d z8?JX`AS4GS%5^JdylrY1rE@UXHDg?VMP)>`U$J8V1?Z%4tofjgsi=ESD`FSqpC3g} z@hWs5R>@{Z^wTAFUP(RfpvS>YY}5Go!h{3*1$+dS#4`H5=^Dc!%q7TCEa+lgo3y+h z`o`OSocQd%EoX!BV^%bq&WB0ekFaKS_xG!`Wtq>a`ntmJ<*poiGgv1T$bM9Ppj!vk zIM@aR2bQw)Vc8!ybQ`D_YwtUsZ&Ie%oWT{! zm+Gy|^%eiRV+UeN@Wj?}muo%kvf(*HS9)%|1aEpJ$K+ogCs9C$kA9~)Exxg=4JUY; zHabS&u2%0a{YUNFY@fw~pd7exbpfx)Ly(kB0T^%KE&v*x4|H9R@9}uQcD)C5w(lVC z{Im++8MeS;Kkly#?|yiX2NqlplP|d8 zV@@!`?TR^#eR`{ArEbi4$K4rE8ve>8GIT}pzKEVP=}PK=K!V!S{>M?TfEz5oHx3^j z0|t`%N2e^@P06Ot@F>HL2q!V`>e$k7=o(GwVOm4%l!{!^pHkADrl^#B_gkE`+7QVQ z%_+otKPrI=@9%ntPf6MW5`2s=#FI-E7q(fo9Lz1?iDZi=xHR0QupW@u)70;RTr_$B zTIa-qA=92;qb7vPAT?J4`zCJ~@0@}mWF6u#_6Z+(DxyPs)&IJ|u5TBsKbia&Tn-10 zoSF1$B^Ox3oI~hP9KtbD_iY&2(0)4r&!NGm2KwKwIK$IK%0|8$j*kodU))vL2~MXD z3e+nX#vTG_KP1*vt&M6TO$UhpqHWudpw;;Oen=$Z8H_88m%Z{*Dlk&})7@)tI?>z& z4<{;N?}XfXFxI^bXT{p}I|33+7RFN1w2m|MFt4LtU{UU8DEV!8di7Z{X*-ExZ~%q> zK1PPoW^WB@&TqVYf~z?mSB1DuI2Z<(d8ilLWZ^bKJtj$&GIB#b$`c?>3Pvi#5(T>19#1{GId z2B>t@Hl^WqwAe@W%ajei6-XlsWk_D~0$I^isGjx61iPV)tpEdLcwYj5n9gWFOBI%(gt zLE#%Zu}kfhmfP8Z#ZB5c%ZDS>En=#0~ZV_Cj&BrJHEc6BM4= zTA0_>69pYsX>~%-|Edq-e%*#1t|yCk^RXhr8-4ylztOV#dTlh|W4qZ0n=0anu@rZ` z@Tp=pMXb;fSEr43r96CXg-B>kIGWj>#$Nv3gV2rUcRQF8B{zh8{vsI@a5hBI}G z*nX72ibAtvTtxckCT**uX1pnXM~fXyZz*%2c5)pQ$LP0yoU+45vy`@^d2ZX^4)xDz z@Mb$;8lue1yS?3%t6_ov$F?x}jxal^ zWWkW%to-qVB)PMghO>#GvnjW+qv;0$Ff%f<&@nO5F>BAs$14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a>6WiAr-I$WF+$Z^fPx2o7%qsKTAj{k!r|du% zFmlcg@W_mhN|~IHxGE@Vhc`&selR-VnX=I)JKZ5Cz$H7tGb=VYWwtLuc0Z6EoHEBV zGtMSE%`;^KPzz9RaMBK-IZ??|JTnsjZtFhoQOT=xI}|J##)gHp@Fk{~}2jR1^HOoDQJV!ok{ z@~n!nZBxqJ8|rFHLp1#Q=FgiqQ{ADawz+%e&Vw6PES}#Zp*?fQiK{O@et7lZ`q_yh zM!hF*-+TM%;M7lQ(p%n6ZO}d;gLtTYA0;(fK~EmZnUOawcQXF_S669w?3g{Z)NAk?n+w2bLLjmEU8y_ZTHsx?c(|*JVRoJy5B6D)K^l*o#2#tCqM%l%yncptHiBg>H|v_poZ|Okcg6? z#Bzm#qWrYXoK%I9%7Rpd%z~0+28N0`k3aEn6ozSNobo??#`9?q1G6%>UNX0^vat7L z5oTcpmj;u=Da^{7LljQmxN_pinIkes*iScjEb!7}cqJ}a^2y0`D$oiBPgg&ebxsLQ E0DL{+UjP6A literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..011e209deb2600c34067e09802c56061c8c7cdbd GIT binary patch literal 1798 zcmV+h2l@DkP)004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Ri3kVes76nUTAOHXbr%6OXR9M69md|foR~5%U z=f0owW<0Sywn;;iHjb3wMHA5gr9efYl@L{g5bCB#K-w(2Mpbq!Sex`*lwj?>QdFc0wy8gw!M5H>3CNIp=%NJ@fUwQHvt-7T%Gy%{8W&s1Vf%kyo%fEhcD}X~M0dRqj1G}sLBcM}_5uoRv z-#Kv=Zyxw0a2VJFMAiEt@F(C8K)w#bok;EgUjRM>1eKEoGN22r0LzC?vI^t^EI(f* zR;X6{2=H(PLMXpzg>f4ADsZ718vrU>06tYEpLFzqk5yGUQUN)wu=4$k``0I&jM zpMEXjUji+oQQ+G78Q0FuJKCHT4V$7~C>YOInF^Gtfndr-Dr-J-Z+r25GueT0BfmFw z)aJgXXJaV`7odR&55IlbN;(V z>j#Y1nqWMTs+*NSsr#yRw&f2+@U)qIB5h`eQo+5|Ob$g6*-qq?D)aFy*m6XMk@3k5*+=M|b7*?6V!wumHtXLSOYiSovlfx+`ASI}=(-8B5g3 z@MAW}4NB7bwgJAlIk?3y_Qz3vwCkwj1gcZA^U=I3h_?67Vkmxh8~g%1V%U~=ghqCF z*vbyN25~-2@^UHUH@u7az&lglp5L$r`14i}N~Gi2xpc{C*OPmtyUd-QmD%AM+(A1kb?gUjA^t}sS&pj7dNo5BRnurWi zVN`t=P*sXlC0;8`?=8Q$|A%kP|KjxHEC1lPKR&t<<cv#6l^N=QqjIjkt$hiWz;LA*U3%0U6}To zm2swU3gM3c7k)5t84XN>foZgWrJBaszWj1`Uw*k;;Fv@LcVr@OB$1y;&AT}c!b`QQ zt?TY^m!c*YJGkbJ&kvlqV@VQ~(b!6ySQ%%a_SQa>yncX^)cUabHiS}8%qXH--T?@z zHI1`c5vOyshX9fa4C0oaf&2;+KI`N?&uAE_En{4ck>fxwEYwp?@p91bXKFWIA*7 z?MuL$lwx!ds8{oES4cuBe0nj>l4nx28XW#e+6Cx)&Rculd~+Hq_Cw{e24NV z9|HDQrS>N(9RsI;Ym(^bC3HntbYx+4WjbwdWNBu3 z05UK#Gc7SOEigD#GBG+aG&(XeD=;uRFfgPDBbfjI09SfcSaechcOY6Cgx@G{a;ABePT> o%h=S&#LUDT#0SfONT5nC0O}VJbn-$ql>h($07*qoM6N<$f?U!f*#H0l literal 0 HcmV?d00001 diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..0e40b221c1cb5322466f511cd18983d29bd40eaa GIT binary patch literal 15086 zcmeI3d014}-N!GIX3^J}m^5#bsE|ehml$wCTo49SRCXNpZ8l(-g+&-l)VL(Zx+hdy zwGdozO-wbe;0Cs)#+s!u8k@MpxI`P%O4@(=Jnd8Z{oa`~Ud9<1)I52g_mSuMc+S0Z z@A-Xy=bn4dIlpt1Bp=CF3J#W#`b%5-Nm8gJN&fzh63gFbV{N-k>Y+`M+EP(Dy7bgXN$Wyaj#( zs=-W<4*G+K@0lOAzy~B-V0ZAp;92l9$8nd=*np-UU>aBp>HxJ1c)Z5JuaQ;)KAUcx zvEjVd7T6UUp9doyV?z)N73gIAD}nGTreZ-wBe9hQv?pdkma?mcOCcbHxJyYe)_?8q&8mNYz@&Y2$HJ$vYnKJJJButpua|7uakBj1Z=bJSdXGkJC(+( zo8F2|Jy16)XZ7}<7Z&g7UaDH)zBJP{h(OQ|32FX;C7%H>%= zH@}~+0_QVOYK>PPs#B-#U6-5o?xv!Q`ghASSMQphRoyrzd(mN|=H+7(in70mFeO}$ zG$mXCmm^Gxm%~koUyrn^jt(gs^7f$e7uWSIAH4e6se|5pykfwDhbx2T`q`aqh81-bk^IJeO=8aVF6^;#6Eo=*KZ7!`~lUI&@!1S;%Jp zsW1HM(aHhyyR`L(ZIAr|W(nT?rKC3Y%T3=_J1uK@!y9?iPtMm~qb-MhkWkreKLG$-XAh|?)yqf3Wvon{&v)|zT+cN_ujy@3U61q}_ll;5GQ(LQcz7I|u~He`L1Up(n% z=d}wzWFOACXjl3nKc9I<)p6+A1dQMr*XMw9T|hXf0S#au*voXgF6&chz9MuprS_$U z%xl@EiFcvh;e*Aq4|qquC^V1$beCarW3ycowXFgpf&46x`hbbx9l+nI+sS@?(xJDt zn)5xUF_kuhhB$gn$LW zqjvnZ_pm;`VY@cx)EkB@*8APJ8KK!FO|gVup?=nVnJN6^eTK;mXg5N$Yf8=Y!47I{ z4?Mp;^#!p<^Gp+N+_N_0qTT+@jlP*|8U1yEdDO{w3nm_DN4wCw9lGCZPdm%lZkV*+ zi1#Q7WutFu>G=EZ(!Sklz5N>2O^#{&jkN`nkFw6%)8+V$+7|)0?T6YM4Y3WCrtpvW zOyy^58GH4I&`<4ruke0d-=WL?jBU09{rr6i2nCyg+w12WRmaemIKK<{PJEeciMW0* zeW3Pq(}b3V1?kuMjR2Q+>(Y)vlV~TdDZlUh293lwh~NHL@Gfxrz;9Mv;e@74OW5b4 z?r(h?!p7bILZv^YW1mNqYANI%ja5Lfbw)JTwd{BA)94yB8?`m{; zSA}*l9%qX2?j_7C&MnWY`ebJI(u2D6Ep_RsyS$J5+X8!Fp1_z9=M3_E=mA^MtezDE z7WTuOX?R)4Hu^KEWccv}Yv`vOujASLSsu^B2h0Kf;co82IZYboG}8-{Z>-0Bf!aBr zSg$EMUz=||vpjFw$MbSt|J~HgRlD=j-`Sj$x_=GlPMkwwE~WNZ*RDU<0tY*EyUsuL@MAxT`V7^z+5qiVx@I z%=`W9?8S$wvR3S~WY+D_rf=Pvp0=w#QGH;In4>aQOqb_PHYCff2bY-kXfJ0PV^;^`z6&dA6Yr zV{g#i4KPM@H?bN%yGp65Urey(^Y6Fz)V*#~|O4X65hHkC`L!hHQb8fw%gFZX52Q?!MqB zu5|<3T|f{h2aHvB)(`LMpN8#mZGJ!LgKIkydifjQ*cLftM3VYeZX^|7vS+j-ozliu>nop0N2#=!E4}GfX_v|>-=t30Do`F`?0Qp zKCU@D3i^UDz-KcS@SS<&$I^A*?)@6LUjsjE4YbOClH_&z5U-S^|HyYp(!Zq$`S{;b z4@c_jNPQeBFW_~OpCiQy-Y-eMj#Qd$Kb9gS>7R7hPm(S&#Y$H*Q_OVdF!iFR5!W^I zdfa!BDasM$LWktE2fMA!_TyIm0Q;@2Eshi?XqtpQUMUE+-0^EZV=*ZtT_WU8l!Im$G0gD0Gc{$El z0?L32JmIxn`<&q!jEe?=P%s2M3vP=qi84_>#|bNda?Hjtk2roE`B>n-CELelv=Ie3 zU*Y(jb6Jj$3HK<319=W2?1~iSa(p7jSju^Z$O|2_a2@xg*q@8tu0P662HYdVaX1n4 zAdyl7fB%`VsZ3E`bO-hoI+|8u?wadoH1xwhhFFwIUulyzggtjD=LZ2wyCM(U9CLCn zl&~YGoX2xMFV^&9ryQxBnA*5zSYGwcUPj%{N6JUPi82`{aKsDbI(KKA=Q!^76E>A8 z>^=z=!j`QC7m2pI_jP0 zrw(4#YwDoto)rTZK2q8LRhPAQ+GgEwZsay*5}fp<4{U0%2cVv|W@Rrq^hVx{6TitX zJ-rUGFRrOCGGu_iufk-Sii7@UqamKbUfmVh%^3yYt6xB!5gXR;mZDVVHX+# zCMbOtdBpX7-ZxBmdcDRt6>In>X-gS@+lUz4>_YWl(h!qooKaz0nQw8dHMLp$PPB%e znou(QXjs{hbs5&M5MfiKh(C4%Dc~)Dwbks?j8QV?%6M8mY)>mnzA?o--kWuJp+T{> zAjgHz&MJ;Pj&-|Q*kFuY-YE?bj#lpI#Jyv2%Va?K2fcz>>WY- zxx7o4aYAWBk!Ks|395Jw8u#r)8^PNC$u{dVmhj84->LOU+FpmY%QlR*e4$aF+yooW zHjezuK<-bG{trk4Yv=l7BG(A$gYT?`%Qge#n7^lt*X*t2b609_{NH7!F^1QcR%{? zIat46P>^wLgCh1{o%hm5b8{9nnKSBkq^0iOh<+va=Ly$6*p~O_vTqRmC&vvlxZg>{ z^rt3UU`5tjt`w$DFZdyNKO$f0nb)w68ZT zK>y}`3}ugvvTyX%Z_0W^AKSGo{Fmr^_YTQlwmT+m-&*#`3R|9H?~I8JZd2#tfMWuC z?9h{aah_w0X&7@PV(fwafK56eVr^~44Y1{U%&}FpFxe)x1wM^(IX)BhFYGbpIojnI zClxRrE5|ia-h*H?conex_Qr4R+23WGv3Cz1IO?-@CM+s;h2v3IN(;l zqL_P< z{6LS3h4O|M9zJ$)01Jf|EIa;EYrJ zfGrg5kn2vp0M`oK&I2KZ>n&U}YV*$F`o{p*-F^w?fh^z$?AO`f-`==yEZ{l^=Vi|S zTY&30pXd8n$2ktb+HUjGN$RH9?j<@g)(Agt9}M_$0#;q1%B5yubeV;5sw^&;+H3$X7keqXI&%4d1)>@Q8i zUgSdan9mgBb>6!UG7AVZ)`lkT*0k98M0roX=3g+P=$-J?1G%*AN3e@-M`1djRw%a};kV^E}rAzIqL@(rb*D+W9ZYa%C$3 literal 0 HcmV?d00001 diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 3ff9df9..79ca08e 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -37,7 +37,6 @@ use webgestalt_lib::{ /// @author John Elizarraras /// @name fill_input_data_frame /// @keywords internal -/// @export #[extendr] pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { let genes_vec = genes.as_string_vector().unwrap(); @@ -66,7 +65,6 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { /// Run ORA using Rust library /// @name ora_rust -/// @export #[extendr] fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { let config: ORAConfig = ORAConfig { From bc7dc4699d78a7f86e7798e7b8d9b4a65b011869 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 9 Nov 2023 13:24:46 -0600 Subject: [PATCH 69/82] fix windows and add better function comments --- .github/workflows/check-standard.yaml | 19 +++- NAMESPACE | 3 - R/WebGestaltR.R | 138 ++++++++++++++------------ R/WebGestaltRMultiOmics.R | 25 ++--- R/extendr-wrappers.R | 30 +++++- man/WebGestaltR.Rd | 49 +++++---- man/WebGestaltRMultiOmics.Rd | 45 ++++----- man/gsea_rust.Rd | 19 ++++ man/ora_rust.Rd | 16 +++ man/rust_multiomics_ora.Rd | 29 ++++++ src/Makevars.win | 31 +++--- src/rust/src/lib.rs | 41 +++++++- 12 files changed, 289 insertions(+), 156 deletions(-) create mode 100644 man/rust_multiomics_ora.Rd diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index a3ac618..092ed4d 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -18,17 +18,26 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - { os: macos-latest, r: "release" } + - { os: windows-latest, r: "release" } + - { os: ubuntu-latest, r: "devel", http-user-agent: "release" } + - { os: ubuntu-latest, r: "release" } + - { os: ubuntu-latest, r: "oldrel-1" } env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: + - name: Install stable toolchain + if: ${{startsWith(matrix.config.os, 'windows')}} + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: x86_64-pc-windows-msvc + profile: minimal + default: true + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/NAMESPACE b/NAMESPACE index c54016f..dfaf896 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,10 +7,8 @@ export(WebGestaltRBatch) export(WebGestaltRMultiOmics) export(WebGestaltR_batch) export(affinityPropagation) -export(fill_input_data_frame) export(formatCheck) export(goSlimSummary) -export(gsea_rust) export(idMapping) export(idToSymbol) export(listArchiveURL) @@ -21,7 +19,6 @@ export(listIdType) export(listOrganism) export(listReferenceSet) export(loadGeneSet) -export(ora_rust) export(prepareGseaInput) export(prepareInputMatrixGsea) export(readGmt) diff --git a/R/WebGestaltR.R b/R/WebGestaltR.R index 9f2b924..6a360cc 100644 --- a/R/WebGestaltR.R +++ b/R/WebGestaltR.R @@ -129,6 +129,10 @@ #' @param nThreads The number of cores to use for GSEA and set cover, and in batch function. #' @param cache A directory to save data cache for reuse. Defaults to \code{NULL} and disabled. #' @param hostName The server URL for accessing data. Mostly for development purposes. +#' @param useWeightedSetCover Use weighted set cover for ORA. Defaults to \code{TRUE}. +#' @param useAffinityPropagation Use affinity propagation for ORA. Defaults to \code{FALSE}. +#' @param usekMedoid Use k-medoid for ORA. Defaults to \code{TRUE}. +#' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}. #' @param ... In batch function, passes parameters to WebGestaltR function. #' Also handles backward compatibility for some parameters in old versions. #' @@ -169,78 +173,86 @@ #' @examples #' \dontrun{ #' ####### ORA example ######### -#' geneFile <- system.file("extdata", "interestingGenes.txt", package="WebGestaltR") -#' refFile <- system.file("extdata", "referenceGenes.txt", package="WebGestaltR") +#' geneFile <- system.file("extdata", "interestingGenes.txt", package = "WebGestaltR") +#' refFile <- system.file("extdata", "referenceGenes.txt", package = "WebGestaltR") #' outputDirectory <- getwd() -#' enrichResult <- WebGestaltR(enrichMethod="ORA", organism="hsapiens", -#' enrichDatabase="pathway_KEGG", interestGeneFile=geneFile, -#' interestGeneType="genesymbol", referenceGeneFile=refFile, -#' referenceGeneType="genesymbol", isOutput=TRUE, -#' outputDirectory=outputDirectory, projectName=NULL) +#' enrichResult <- WebGestaltR( +#' enrichMethod = "ORA", organism = "hsapiens", +#' enrichDatabase = "pathway_KEGG", interestGeneFile = geneFile, +#' interestGeneType = "genesymbol", referenceGeneFile = refFile, +#' referenceGeneType = "genesymbol", isOutput = TRUE, +#' outputDirectory = outputDirectory, projectName = NULL +#' ) #' #' ####### GSEA example ######### -#' rankFile <- system.file("extdata", "GeneRankList.rnk", package="WebGestaltR") +#' rankFile <- system.file("extdata", "GeneRankList.rnk", package = "WebGestaltR") #' outputDirectory <- getwd() -#' enrichResult <- WebGestaltR(enrichMethod="GSEA", organism="hsapiens", -#' enrichDatabase="pathway_KEGG", interestGeneFile=rankFile, -#' interestGeneType="genesymbol", sigMethod="top", topThr=10, minNum=5, -#' outputDirectory=outputDirectory) +#' enrichResult <- WebGestaltR( +#' enrichMethod = "GSEA", organism = "hsapiens", +#' enrichDatabase = "pathway_KEGG", interestGeneFile = rankFile, +#' interestGeneType = "genesymbol", sigMethod = "top", topThr = 10, minNum = 5, +#' outputDirectory = outputDirectory +#' ) #' #' ####### NTA example ######### -#' enrichResult <- WebGestaltR(enrichMethod="NTA", organism="hsapiens", -#' enrichDatabase="network_PPI_BIOGRID", interestGeneFile=geneFile, -#' interestGeneType="genesymbol", sigMethod="top", topThr=10, -#' outputDirectory=getwd(), highlightSeedNum=10, -#' networkConstructionMethod="Network_Retrieval_Prioritization") +#' enrichResult <- WebGestaltR( +#' enrichMethod = "NTA", organism = "hsapiens", +#' enrichDatabase = "network_PPI_BIOGRID", interestGeneFile = geneFile, +#' interestGeneType = "genesymbol", sigMethod = "top", topThr = 10, +#' outputDirectory = getwd(), highlightSeedNum = 10, +#' networkConstructionMethod = "Network_Retrieval_Prioritization" +#' ) #' } #' -WebGestaltR <- function(omic_type = "single", enrichMethod="ORA", organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, interestGeneNames=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, sigMethod="fdr", fdrMethod="BH", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, gseaP=1, isOutput=TRUE, outputDirectory=getwd(), projectName=NULL, dagColor="continuous", saveRawGseaResult=FALSE, gseaPlotFormat=c("png", "svg"), setCoverNum=10, networkConstructionMethod=NULL, neighborNum=10, highlightType="Seeds", highlightSeedNum=10, nThreads=1, cache=NULL, hostName="https://www.webgestalt.org/", useWeightedSetCover = FALSE, useAffinityPropagation = FALSE, usekMedoid = TRUE, kMedoid_k = 25, ...) { - extraArgs <- list(...) - if ('keepGSEAFolder' %in% names(extraArgs) | 'keepGseaFolder' %in% names(extraArgs)) { - warning("Parameter keepGSEAFolder is obsolete.\n") - } - if ('is.output' %in% names(extraArgs)) { - isOutput <- extraArgs$is.output - warning("Parameter is.output is deprecated and changed to isOutput!\n") - warning("Column names of the result data frame are modified.") - } - if ('methodType' %in% names(extraArgs)) { - warning("Parameter methodType is obsolete.\n") - } - if ('lNum' %in% names(extraArgs)) { - warning("Parameter lNum is obsolete.\n") - } - if ('dNum' %in% names(extraArgs)) { - warning("Parameter dNum is deprecated and changed to reportNum.\n") - reportNum <- extraArgs$dNum - } +WebGestaltR <- function(enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, + interestGeneNames = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, + referenceSet = NULL, minNum = 10, maxNum = 500, sigMethod = "fdr", fdrMethod = "BH", fdrThr = 0.05, topThr = 10, reportNum = 20, + perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "continuous", + saveRawGseaResult = FALSE, gseaPlotFormat = c("png", "svg"), setCoverNum = 10, networkConstructionMethod = NULL, + neighborNum = 10, highlightType = "Seeds", highlightSeedNum = 10, nThreads = 1, cache = NULL, + hostName = "https://www.webgestalt.org/", useWeightedSetCover = FALSE, useAffinityPropagation = FALSE, + usekMedoid = TRUE, kMedoid_k = 25, ...) { + extraArgs <- list(...) + if ("keepGSEAFolder" %in% names(extraArgs) | "keepGseaFolder" %in% names(extraArgs)) { + warning("Parameter keepGSEAFolder is obsolete.\n") + } + if ("is.output" %in% names(extraArgs)) { + isOutput <- extraArgs$is.output + warning("Parameter is.output is deprecated and changed to isOutput!\n") + warning("Column names of the result data frame are modified.") + } + if ("methodType" %in% names(extraArgs)) { + warning("Parameter methodType is obsolete.\n") + } + if ("lNum" %in% names(extraArgs)) { + warning("Parameter lNum is obsolete.\n") + } + if ("dNum" %in% names(extraArgs)) { + warning("Parameter dNum is deprecated and changed to reportNum.\n") + reportNum <- extraArgs$dNum + } - if (!is.null(cache)) { - cat("Use cache data if available.\n") - } + if (!is.null(cache)) { + cat("Use cache data if available.\n") + } - ## TODO: add para test for NTA - errorTest <- parameterErrorMessage(enrichMethod=enrichMethod, organism=organism, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, perNum=perNum, isOutput=isOutput, outputDirectory=outputDirectory, dagColor=dagColor, hostName=hostName, cache=cache) - if(!is.null(errorTest)){ - return(errorTest) - } + ## TODO: add para test for NTA + errorTest <- parameterErrorMessage(enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, perNum = perNum, isOutput = isOutput, outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache) + if (!is.null(errorTest)) { + return(errorTest) + } - if(is.null(projectName)){ - projectName <- as.character(as.integer(Sys.time())) - } - projectName <- sanitizeFileName(projectName) # use for GOSlim summary file name, convert punct to _ - if (omic_type == "single"){ - if (enrichMethod == "ORA") { - enrichR <- WebGestaltROra(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) - } else if (enrichMethod == "GSEA") { - enrichR <- WebGestaltRGsea(organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFile=interestGeneFile, interestGene=interestGene, interestGeneType=interestGeneType, collapseMethod=collapseMethod, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, perNum=perNum, p=gseaP, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, saveRawGseaResult=saveRawGseaResult, plotFormat=gseaPlotFormat, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) - } else if (enrichMethod == "NTA") { - enrichR <- WebGestaltRNta(organism=organism, network=enrichDatabase, method=networkConstructionMethod, neighborNum=neighborNum, highlightSeedNum=highlightSeedNum, inputSeed=interestGene, inputSeedFile=interestGeneFile, interestGeneType=interestGeneType, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, outputDirectory=outputDirectory, projectName=projectName, highlightType=highlightType, cache=cache, hostName=hostName) - } - } - else if (omic_type == "multi"){ - enrichR <- WebGestaltRMultiOmics(enrichMethod = enrichMethod, organism=organism, enrichDatabase=enrichDatabase, enrichDatabaseFile=enrichDatabaseFile, enrichDatabaseType=enrichDatabaseType, enrichDatabaseDescriptionFile=enrichDatabaseDescriptionFile, interestGeneFiles=interestGeneFile, interestGenes =interestGene, interestGeneType=interestGeneType, interestGeneNames = interestGeneNames, collapseMethod=collapseMethod, referenceGeneFile=referenceGeneFile, referenceGene=referenceGene, referenceGeneType=referenceGeneType, referenceSet=referenceSet, minNum=minNum, maxNum=maxNum, fdrMethod=fdrMethod, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, reportNum=reportNum, setCoverNum=setCoverNum, isOutput=isOutput, outputDirectory=outputDirectory, projectName=projectName, dagColor=dagColor, nThreads=nThreads, cache=cache, hostName=hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) - } - - return(enrichR) + if (is.null(projectName)) { + projectName <- as.character(as.integer(Sys.time())) + } + projectName <- sanitizeFileName(projectName) # use for GOSlim summary file name, convert punct to _ + if (enrichMethod == "ORA") { + enrichR <- WebGestaltROra(organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, interestGeneFile = interestGeneFile, interestGene = interestGene, interestGeneType = interestGeneType, collapseMethod = collapseMethod, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, setCoverNum = setCoverNum, isOutput = isOutput, outputDirectory = outputDirectory, projectName = projectName, dagColor = dagColor, nThreads = nThreads, cache = cache, hostName = hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) + } else if (enrichMethod == "GSEA") { + enrichR <- WebGestaltRGsea(organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, interestGeneFile = interestGeneFile, interestGene = interestGene, interestGeneType = interestGeneType, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, setCoverNum = setCoverNum, perNum = perNum, p = gseaP, isOutput = isOutput, outputDirectory = outputDirectory, projectName = projectName, dagColor = dagColor, saveRawGseaResult = saveRawGseaResult, plotFormat = gseaPlotFormat, nThreads = nThreads, cache = cache, hostName = hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k) + } else if (enrichMethod == "NTA") { + enrichR <- WebGestaltRNta(organism = organism, network = enrichDatabase, method = networkConstructionMethod, neighborNum = neighborNum, highlightSeedNum = highlightSeedNum, inputSeed = interestGene, inputSeedFile = interestGeneFile, interestGeneType = interestGeneType, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, outputDirectory = outputDirectory, projectName = projectName, highlightType = highlightType, cache = cache, hostName = hostName) + } + return(enrichR) } diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 51fac47..ce2f1bb 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -47,7 +47,7 @@ #' @param referenceLists For the ORA method, users can also use an R object as the reference #' gene list. \code{referenceLists} should be an R \code{vector} object containing the #' reference gene list. -#' @param referenceGeneType The ID type of the reference gene list. The supported ID types +#' @param referenceTypes Vector of the ID types of the reference lists. The supported ID types #' of WebGestaltR for the selected organism can be found by the function \code{listIdType}. #' If the \code{organism} is \code{others}, users do not need to set this parameter. #' @param minNum WebGestaltR will exclude the categories with the number of annotated genes @@ -92,30 +92,19 @@ #' \code{png}, or \code{c("png", "svg")} (default). #' @param setCoverNum The number of expected gene sets after set cover to reduce redundancy. #' It could get fewer sets if the coverage reaches 100\%. The default is \code{10}. -#' @param networkConstructionMethod Netowrk construction method for NTA. Either -#' \code{Network_Retrieval_Prioritization} or \code{Network_Expansion}. Network Retrieval & -#' Prioritization first uses random walk analysis to calculate random walk probabilities -#' for the input seeds, then identifies the relationships among the seeds in the selected -#' network and returns a retrieval sub-network. The seeds with the top random walk -#' probabilities are highlighted in the sub-network. Network Expansion first uses random -#' walk analysis to rank all genes in the selected network based on their network -#' proximity to the input seeds and then return an expanded sub-network in which nodes -#' are the input seeds and their top ranking neighbors and edges represent their -#' relationships. -#' @param neighborNum The number of neighbors to include in NTA Network Expansion method. -#' @param highlightType The type of nodes to highlight in the NTA Network Expansion method, -#' either \code{Seeds} or \code{Neighbors}. -#' @param highlightSeedNum The number of top input seeds to highlight in NTA Network Retrieval -#' & Prioritizaiton method. #' @param nThreads The number of cores to use for GSEA and set cover, and in batch function. #' @param cache A directory to save data cache for reuse. Defaults to \code{NULL} and disabled. #' @param hostName The server URL for accessing data. Mostly for development purposes. +#' @param useWeightedSetCover Use weighted set cover for ORA. Defaults to \code{TRUE}. +#' @param useAffinityPropagation Use affinity propagation for ORA. Defaults to \code{FALSE}. +#' @param usekMedoid Use k-medoid for ORA. Defaults to \code{TRUE}. +#' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}. #' @export WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, - topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, p = 1, isOutput = TRUE, outputDirectory = getwd(), - projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, plotFormat = "png", nThreads = 1, cache = NULL, + topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), + projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, gseaPlotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, referenceSet) { diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 5ae5a1e..f3ae32f 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -35,19 +35,43 @@ NULL #' @author John Elizarraras #' @name fill_input_data_frame #' @keywords internal -#' @export fill_input_data_frame <- function(gmt, genes, gene_sets) .Call(wrap__fill_input_data_frame, gmt, genes, gene_sets) #' Run GSEA using rust library +#' @param min_overlap the minimum overlap between analyte set and analyte list +#' @param max_overlap the maximum overlap between analyte set and analyte list +#' @param permutations the number of permutations to run +#' @param sets A vector of analyte set names +#' @param parts A list of the analytse in the analyte sets +#' @param analytes A vector of analytes names in the GSEA list +#' @param ranks A vector of ranks for the analytes in the GSEA list #' @return List of the results of GSEA +#' @author John Elizarraras +#' @keywords internal #' @name gsea_rust -#' @export gsea_rust <- function(min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) .Call(wrap__gsea_rust, min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) #' Run ORA using Rust library #' @name ora_rust -#' @export +#' @param sets A vector of analyte set names +#' @param parts A list of the analyte in the analyte sets +#' @param interest A vector of analytes of interest +#' @param reference A vector of analytes in the reference set +#' @returns A list of the results of ORA +#' @author John Elizarraras +#' @keywords internal ora_rust <- function(sets, parts, interest, reference) .Call(wrap__ora_rust, sets, parts, interest, reference) +#' Run multiomics ORA using Rust library +#' @param sets list of the names of the analyte sets +#' @param parts list of the analyte in the analyte sets +#' @param interest list of analytes of interest +#' @param reference list of analytes in the reference set +#' @param method meta-analysis method to get meta-p values +#' @returns A list of vectors containing the results of ORA, with each list corresponding to each input list +#' @author John Elizarraras +#' @keywords internal +rust_multiomics_ora <- function(sets, parts, interest, reference, method) .Call(wrap__rust_multiomics_ora, sets, parts, interest, reference, method) + # nolint end diff --git a/man/WebGestaltR.Rd b/man/WebGestaltR.Rd index 9e991d4..c4734a3 100644 --- a/man/WebGestaltR.Rd +++ b/man/WebGestaltR.Rd @@ -9,7 +9,6 @@ \title{WebGestaltR: The R interface for enrichment analysis with WebGestalt.} \usage{ WebGestaltR( - omic_type = "single", enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, @@ -218,6 +217,14 @@ either \code{Seeds} or \code{Neighbors}.} \item{hostName}{The server URL for accessing data. Mostly for development purposes.} +\item{useWeightedSetCover}{Use weighted set cover for ORA. Defaults to \code{TRUE}.} + +\item{useAffinityPropagation}{Use affinity propagation for ORA. Defaults to \code{FALSE}.} + +\item{usekMedoid}{Use k-medoid for ORA. Defaults to \code{TRUE}.} + +\item{kMedoid_k}{The number of clusters for k-medoid. Defaults to \code{25}.} + \item{...}{In batch function, passes parameters to WebGestaltR function. Also handles backward compatibility for some parameters in old versions.} @@ -278,29 +285,35 @@ report. \examples{ \dontrun{ ####### ORA example ######### -geneFile <- system.file("extdata", "interestingGenes.txt", package="WebGestaltR") -refFile <- system.file("extdata", "referenceGenes.txt", package="WebGestaltR") +geneFile <- system.file("extdata", "interestingGenes.txt", package = "WebGestaltR") +refFile <- system.file("extdata", "referenceGenes.txt", package = "WebGestaltR") outputDirectory <- getwd() -enrichResult <- WebGestaltR(enrichMethod="ORA", organism="hsapiens", - enrichDatabase="pathway_KEGG", interestGeneFile=geneFile, - interestGeneType="genesymbol", referenceGeneFile=refFile, - referenceGeneType="genesymbol", isOutput=TRUE, - outputDirectory=outputDirectory, projectName=NULL) +enrichResult <- WebGestaltR( + enrichMethod = "ORA", organism = "hsapiens", + enrichDatabase = "pathway_KEGG", interestGeneFile = geneFile, + interestGeneType = "genesymbol", referenceGeneFile = refFile, + referenceGeneType = "genesymbol", isOutput = TRUE, + outputDirectory = outputDirectory, projectName = NULL +) ####### GSEA example ######### -rankFile <- system.file("extdata", "GeneRankList.rnk", package="WebGestaltR") +rankFile <- system.file("extdata", "GeneRankList.rnk", package = "WebGestaltR") outputDirectory <- getwd() -enrichResult <- WebGestaltR(enrichMethod="GSEA", organism="hsapiens", - enrichDatabase="pathway_KEGG", interestGeneFile=rankFile, - interestGeneType="genesymbol", sigMethod="top", topThr=10, minNum=5, - outputDirectory=outputDirectory) +enrichResult <- WebGestaltR( + enrichMethod = "GSEA", organism = "hsapiens", + enrichDatabase = "pathway_KEGG", interestGeneFile = rankFile, + interestGeneType = "genesymbol", sigMethod = "top", topThr = 10, minNum = 5, + outputDirectory = outputDirectory +) ####### NTA example ######### -enrichResult <- WebGestaltR(enrichMethod="NTA", organism="hsapiens", - enrichDatabase="network_PPI_BIOGRID", interestGeneFile=geneFile, - interestGeneType="genesymbol", sigMethod="top", topThr=10, - outputDirectory=getwd(), highlightSeedNum=10, - networkConstructionMethod="Network_Retrieval_Prioritization") +enrichResult <- WebGestaltR( + enrichMethod = "NTA", organism = "hsapiens", + enrichDatabase = "network_PPI_BIOGRID", interestGeneFile = geneFile, + interestGeneType = "genesymbol", sigMethod = "top", topThr = 10, + outputDirectory = getwd(), highlightSeedNum = 10, + networkConstructionMethod = "Network_Retrieval_Prioritization" +) } } diff --git a/man/WebGestaltRMultiOmics.Rd b/man/WebGestaltRMultiOmics.Rd index a2f8652..d25a179 100644 --- a/man/WebGestaltRMultiOmics.Rd +++ b/man/WebGestaltRMultiOmics.Rd @@ -24,13 +24,13 @@ WebGestaltRMultiOmics( reportNum = 20, setCoverNum = 10, perNum = 1000, - p = 1, + gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, - plotFormat = "png", + gseaPlotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", @@ -120,6 +120,9 @@ It could get fewer sets if the coverage reaches 100\%. The default is \code{10}. \item{perNum}{The number of permutations for the GSEA method. The default is \code{1000}.} +\item{gseaP}{The exponential scaling factor of the phenotype score. The default is \code{1}. +When p=0, ES reduces to standard K-S statistics (See original paper for more details).} + \item{isOutput}{If \code{isOutput} is TRUE, WebGestaltR will create a folder named by the \code{projectName} and save the results in the folder. Otherwise, WebGestaltR will only return an R \code{data.frame} object containing the enrichment results. If @@ -145,12 +148,23 @@ corresponding FDRs.} \item{Items_in_Set}{A list with ranks of genes for each gene set} }} +\item{gseaPlotFormat}{The graphic format of GSEA enrichment plots. Either \code{svg}, +\code{png}, or \code{c("png", "svg")} (default).} + \item{nThreads}{The number of cores to use for GSEA and set cover, and in batch function.} \item{cache}{A directory to save data cache for reuse. Defaults to \code{NULL} and disabled.} \item{hostName}{The server URL for accessing data. Mostly for development purposes.} +\item{useWeightedSetCover}{Use weighted set cover for ORA. Defaults to \code{TRUE}.} + +\item{useAffinityPropagation}{Use affinity propagation for ORA. Defaults to \code{FALSE}.} + +\item{usekMedoid}{Use k-medoid for ORA. Defaults to \code{TRUE}.} + +\item{kMedoid_k}{The number of clusters for k-medoid. Defaults to \code{25}.} + \item{referenceLists}{For the ORA method, users can also use an R object as the reference gene list. \code{referenceLists} should be an R \code{vector} object containing the reference gene list.} @@ -159,34 +173,9 @@ reference gene list.} list. The extension of the \code{referenceListFile} should be \code{txt} and the file can only contain one column: the reference gene list.} -\item{referenceGeneType}{The ID type of the reference gene list. The supported ID types +\item{referenceTypes}{Vector of the ID types of the reference lists. The supported ID types of WebGestaltR for the selected organism can be found by the function \code{listIdType}. If the \code{organism} is \code{others}, users do not need to set this parameter.} - -\item{gseaP}{The exponential scaling factor of the phenotype score. The default is \code{1}. -When p=0, ES reduces to standard K-S statistics (See original paper for more details).} - -\item{gseaPlotFormat}{The graphic format of GSEA enrichment plots. Either \code{svg}, -\code{png}, or \code{c("png", "svg")} (default).} - -\item{networkConstructionMethod}{Netowrk construction method for NTA. Either -\code{Network_Retrieval_Prioritization} or \code{Network_Expansion}. Network Retrieval & -Prioritization first uses random walk analysis to calculate random walk probabilities -for the input seeds, then identifies the relationships among the seeds in the selected -network and returns a retrieval sub-network. The seeds with the top random walk -probabilities are highlighted in the sub-network. Network Expansion first uses random -walk analysis to rank all genes in the selected network based on their network -proximity to the input seeds and then return an expanded sub-network in which nodes -are the input seeds and their top ranking neighbors and edges represent their -relationships.} - -\item{neighborNum}{The number of neighbors to include in NTA Network Expansion method.} - -\item{highlightType}{The type of nodes to highlight in the NTA Network Expansion method, -either \code{Seeds} or \code{Neighbors}.} - -\item{highlightSeedNum}{The number of top input seeds to highlight in NTA Network Retrieval -& Prioritizaiton method.} } \description{ Perform multi-omics analysis using WebGestaltR diff --git a/man/gsea_rust.Rd b/man/gsea_rust.Rd index 0bed739..a1e317e 100644 --- a/man/gsea_rust.Rd +++ b/man/gsea_rust.Rd @@ -6,9 +6,28 @@ \usage{ gsea_rust(min_overlap, max_overlap, permutations, sets, parts, analytes, ranks) } +\arguments{ +\item{min_overlap}{the minimum overlap between analyte set and analyte list} + +\item{max_overlap}{the maximum overlap between analyte set and analyte list} + +\item{permutations}{the number of permutations to run} + +\item{sets}{A vector of analyte set names} + +\item{parts}{A list of the analytse in the analyte sets} + +\item{analytes}{A vector of analytes names in the GSEA list} + +\item{ranks}{A vector of ranks for the analytes in the GSEA list} +} \value{ List of the results of GSEA } \description{ Run GSEA using rust library } +\author{ +John Elizarraras +} +\keyword{internal} diff --git a/man/ora_rust.Rd b/man/ora_rust.Rd index 4c5ed95..c9284fd 100644 --- a/man/ora_rust.Rd +++ b/man/ora_rust.Rd @@ -6,6 +6,22 @@ \usage{ ora_rust(sets, parts, interest, reference) } +\arguments{ +\item{sets}{A vector of analyte set names} + +\item{parts}{A list of the analyte in the analyte sets} + +\item{interest}{A vector of analytes of interest} + +\item{reference}{A vector of analytes in the reference set} +} +\value{ +A list of the results of ORA +} \description{ Run ORA using Rust library } +\author{ +John Elizarraras +} +\keyword{internal} diff --git a/man/rust_multiomics_ora.Rd b/man/rust_multiomics_ora.Rd new file mode 100644 index 0000000..244861d --- /dev/null +++ b/man/rust_multiomics_ora.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extendr-wrappers.R +\name{rust_multiomics_ora} +\alias{rust_multiomics_ora} +\title{Run multiomics ORA using Rust library} +\usage{ +rust_multiomics_ora(sets, parts, interest, reference, method) +} +\arguments{ +\item{sets}{list of the names of the analyte sets} + +\item{parts}{list of the analyte in the analyte sets} + +\item{interest}{list of analytes of interest} + +\item{reference}{list of analytes in the reference set} + +\item{method}{meta-analysis method to get meta-p values} +} +\value{ +A list of vectors containing the results of ORA, with each list corresponding to each input list +} +\description{ +Run multiomics ORA using Rust library +} +\author{ +John Elizarraras +} +\keyword{internal} diff --git a/src/Makevars.win b/src/Makevars.win index 0e327bb..717c761 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,10 +1,9 @@ TARGET = $(subst 64,x86_64,$(subst 32,i686,$(WIN)))-pc-windows-gnu -CRATE = WebGestaltR -BASE_TAG = windowsv0.0.1e + TARGET_DIR = ./rust/target LIBDIR = $(TARGET_DIR)/$(TARGET)/release -STATLIB = $(LIBDIR)/$(CRATE).lib -PKG_LIBS = -L$(LIBDIR) -l$(CRATE) -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll +STATLIB = $(LIBDIR)/libWebGestaltR.a +PKG_LIBS = -L$(LIBDIR) -lWebGestaltR -lws2_32 -ladvapi32 -luserenv -lbcrypt -lntdll all: C_clean @@ -12,29 +11,27 @@ $(SHLIB): $(STATLIB) CARGOTMP = $(CURDIR)/.cargo -# See https://yutani.rbind.io/post/some-more-notes-about-using-rust-code-in-r-packages/#precompiled-binary-for-windows $(STATLIB): mkdir -p $(TARGET_DIR)/libgcc_mock - curl -L -o $(STATLIB) https://github.com/iblacksand/$(CRATE)/releases/download/$(BASE_TAG)-$(TARGET)/$(CRATE).lib # `rustc` adds `-lgcc_eh` flags to the compiler, but Rtools' GCC doesn't have # `libgcc_eh` due to the compilation settings. So, in order to please the # compiler, we need to add empty `libgcc_eh` to the library search paths. # # For more details, please refer to # https://github.com/r-windows/rtools-packages/blob/2407b23f1e0925bbb20a4162c963600105236318/mingw-w64-gcc/PKGBUILD#L313-L316 - # touch $(TARGET_DIR)/libgcc_mock/libgcc_eh.a + touch $(TARGET_DIR)/libgcc_mock/libgcc_eh.a # CARGO_LINKER is provided in Makevars.ucrt for R >= 4.2 - # if [ "$(NOT_CRAN)" != "true" ]; then \ - # export CARGO_HOME=$(CARGOTMP); \ - # fi && \ - # export CARGO_TARGET_X86_64_PC_WINDOWS_GNU_LINKER="$(CARGO_LINKER)" && \ - # export LIBRARY_PATH="$${LIBRARY_PATH};$(CURDIR)/$(TARGET_DIR)/libgcc_mock" && \ - # cargo build --target=$(TARGET) --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) - # if [ "$(NOT_CRAN)" != "true" ]; then \ - # rm -Rf $(CARGOTMP) && \ - # rm -Rf $(LIBDIR)/build; \ - # fi + if [ "$(NOT_CRAN)" != "true" ]; then \ + export CARGO_HOME=$(CARGOTMP); \ + fi && \ + export CARGO_TARGET_X86_64_PC_WINDOWS_GNU_LINKER="$(CARGO_LINKER)" && \ + export LIBRARY_PATH="$${LIBRARY_PATH};$(CURDIR)/$(TARGET_DIR)/libgcc_mock" && \ + cargo build --target=$(TARGET) --lib --release --manifest-path=./rust/Cargo.toml --target-dir $(TARGET_DIR) + if [ "$(NOT_CRAN)" != "true" ]; then \ + rm -Rf $(CARGOTMP) && \ + rm -Rf $(LIBDIR)/build; \ + fi C_clean: rm -Rf $(SHLIB) $(STATLIB) $(OBJECTS) diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 79ca08e..52d42ae 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -65,6 +65,13 @@ pub fn fill_input_data_frame(gmt: Robj, genes: Robj, gene_sets: Robj) -> List { /// Run ORA using Rust library /// @name ora_rust +/// @param sets A vector of analyte set names +/// @param parts A list of the analyte in the analyte sets +/// @param interest A vector of analytes of interest +/// @param reference A vector of analytes in the reference set +/// @returns A list of the results of ORA +/// @author John Elizarraras +/// @keywords internal #[extendr] fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { let config: ORAConfig = ORAConfig { @@ -114,6 +121,15 @@ fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { ) } +/// Run multiomics ORA using Rust library +/// @param sets list of the names of the analyte sets +/// @param parts list of the analyte in the analyte sets +/// @param interest list of analytes of interest +/// @param reference list of analytes in the reference set +/// @param method meta-analysis method to get meta-p values +/// @returns A list of vectors containing the results of ORA, with each list corresponding to each input list +/// @author John Elizarraras +/// @keywords internal #[extendr] pub fn rust_multiomics_ora( sets: Robj, @@ -194,9 +210,17 @@ pub fn rust_multiomics_ora( } /// Run GSEA using rust library +/// @param min_overlap the minimum overlap between analyte set and analyte list +/// @param max_overlap the maximum overlap between analyte set and analyte list +/// @param permutations the number of permutations to run +/// @param sets A vector of analyte set names +/// @param parts A list of the analytse in the analyte sets +/// @param analytes A vector of analytes names in the GSEA list +/// @param ranks A vector of ranks for the analytes in the GSEA list /// @return List of the results of GSEA +/// @author John Elizarraras +/// @keywords internal /// @name gsea_rust -/// @export #[extendr] fn gsea_rust( min_overlap: Robj, @@ -266,6 +290,21 @@ fn gsea_rust( ) } +/// Run multiomics GSEA using rust library +/// @param min_overlap the minimum overlap between analyte set and analyte list +/// @param max_overlap the maximum overlap between analyte set and analyte list +/// @param permutations the number of permutations to run +/// @param sets A vector of analyte set names +/// @param parts A list of the analytse in the analyte sets +/// @param analytes A vector of analytes names in the GSEA list +/// @param ranks A vector of ranks for the analytes in the GSEA list +/// @param method_modifier method modifier for the multiomics method ("fisher" or "stouffer" if +/// meta-analysis, "mean", "max", or "rank" if other combination method) +/// @param combo_method method for combining analyte sets (meta, mean, or max) +/// @return List of lists of the results of GSEA +/// @author John Elizarraras +/// @keywords internal +/// @name gsea_rust #[extendr] pub fn rust_multiomics_gsea( min_overlap: Robj, From 063a17d8a0b22c82cd00222cd195ac567252e74a Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 9 Nov 2023 13:44:28 -0600 Subject: [PATCH 70/82] fix documentation --- R/WebGestaltRMultiOmics.R | 11 ++++++++--- R/multiOraEnrichment.R | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index ce2f1bb..e7f09f6 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -34,6 +34,7 @@ #' containing the interesting analyte lists. If \code{enrichMethod} is \code{GSEA}, #' \code{analyteLists} should be an \code{vector} of R \code{data.frame} objects containing two columns: the #' gene list and the corresponding scores. +#' @param analyteTypes a vector containing the ID types of the analyte lists. #' @param analyteLists \code{vector} of the ID type of the corresponding interesting analyte list. The supported ID types of #' WebGestaltR for the selected organism can be found by the function \code{listIdType}. If #' the \code{organism} is \code{others}, users do not need to set this parameter. The length of \code{analyteLists} should be @@ -98,16 +99,20 @@ #' @param useWeightedSetCover Use weighted set cover for ORA. Defaults to \code{TRUE}. #' @param useAffinityPropagation Use affinity propagation for ORA. Defaults to \code{FALSE}. #' @param usekMedoid Use k-medoid for ORA. Defaults to \code{TRUE}. +#' @param isMetaAnalysis whether to perform meta-analysis. Defaults to \code{TRUE}. +#' @param mergeMethod The method to merge the results from multiple omics (options: \code{mean}, \code{max}). Only used if \code{isMetaAnalysis = FALSE}. Defaults to \code{mean}. +#' @param normalizationMethod The method to normalize the results from multiple omics (options: \code{rank}, \code{median}, \code{mean}). Only used if \code{isMetaAnalysis = FALSE}. #' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}. +#' #' @export WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, - topThr = 10, reportNum = 20, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), + topThr = 10, reportNum = 100, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, gseaPlotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, - usekMedoid = FALSE, kMedoid_k = 10, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", - referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, referenceSet) { + usekMedoid = FALSE, kMedoid_k = 25, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { VALID_MERGE_METHODS <- c("mean", "max") VALID_NORM_METHODS <- c("rank", "median", "mean") VALID_ENRICH_METHODS <- c("ORA", "GSEA") diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index 5d398e3..c1f9c73 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -46,5 +46,5 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 tapply(x$gene, x$geneSet, rbind) }) rust_result <- rust_multiomics_ora(geneSet, genes, interestGene, referenceGene) - disp(head(rust_result)) + print(head(rust_result)) } From 3acde494627d50c51a1150994e6c06774278d84f Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 9 Nov 2023 13:51:05 -0600 Subject: [PATCH 71/82] update Rd --- man/WebGestaltRMultiOmics.Rd | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/man/WebGestaltRMultiOmics.Rd b/man/WebGestaltRMultiOmics.Rd index d25a179..8fd7c7e 100644 --- a/man/WebGestaltRMultiOmics.Rd +++ b/man/WebGestaltRMultiOmics.Rd @@ -21,7 +21,7 @@ WebGestaltRMultiOmics( sigMethod = "fdr", fdrThr = 0.05, topThr = 10, - reportNum = 20, + reportNum = 100, setCoverNum = 10, perNum = 1000, gseaP = 1, @@ -37,14 +37,13 @@ WebGestaltRMultiOmics( useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, - kMedoid_k = 10, + kMedoid_k = 25, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", referenceLists = NULL, referenceListFiles = NULL, - referenceTypes = NULL, - referenceSet + referenceTypes = NULL ) } \arguments{ @@ -59,6 +58,8 @@ the interesting analyte list. If \code{enrichMethod} is \code{GSEA}, the extensi \code{analyteListFiles} should be \code{rnk} and the files should contain two columns separated by tab: the analyte list and the corresponding scores.} +\item{analyteTypes}{a vector containing the ID types of the analyte lists.} + \item{enrichMethod}{Enrichment methods: \code{ORA}or \code{GSEA}.} \item{organism}{Currently, WebGestaltR supports 12 organisms. Users can use the function @@ -165,6 +166,12 @@ corresponding FDRs.} \item{kMedoid_k}{The number of clusters for k-medoid. Defaults to \code{25}.} +\item{isMetaAnalysis}{whether to perform meta-analysis. Defaults to \code{TRUE}.} + +\item{mergeMethod}{The method to merge the results from multiple omics (options: \code{mean}, \code{max}). Only used if \code{isMetaAnalysis = FALSE}. Defaults to \code{mean}.} + +\item{normalizationMethod}{The method to normalize the results from multiple omics (options: \code{rank}, \code{median}, \code{mean}). Only used if \code{isMetaAnalysis = FALSE}.} + \item{referenceLists}{For the ORA method, users can also use an R object as the reference gene list. \code{referenceLists} should be an R \code{vector} object containing the reference gene list.} From 1514aaf0e71d95d2a37fc04ed83a1486729eff38 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 9 Nov 2023 14:54:26 -0600 Subject: [PATCH 72/82] fix multiple lists not being read correctly for ORA --- R/WebGestaltRMultiOmics.R | 12 +++++++----- R/loadGeneSet.R | 15 ++++++++------- README.md | 2 ++ dev_install | 0 man/loadGeneSet.Rd | 5 ++++- src/rust/Cargo.lock | 34 +++++++++++++++++----------------- 6 files changed, 38 insertions(+), 30 deletions(-) mode change 100644 => 100755 dev_install diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index e7f09f6..74fb387 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -264,7 +264,8 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } -.load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { +.load_combined_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, + analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { databases <- c() if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases if (length(unique(analyteTypes)) == 1) { @@ -285,19 +286,20 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } all_sets <- loadGeneSet( organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, - enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = TRUE ) return(all_sets) } -.load_meta_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { +.load_meta_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, + analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { databases <- c() if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases if (length(unique(analyteTypes)) == 1) { databases <- enrichDatabase } else { for (i in seq_along(analyteTypes)) { - databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase[i], organism, cache)) + databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase, organism, cache)) } } } else { @@ -305,7 +307,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } all_sets <- loadGeneSet( organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, - enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = TRUE ) return(all_sets) } diff --git a/R/loadGeneSet.R b/R/loadGeneSet.R index d6d6377..01b7b4a 100644 --- a/R/loadGeneSet.R +++ b/R/loadGeneSet.R @@ -1,7 +1,7 @@ #' Load gene set data #' #' @inheritParams WebGestaltR -#' +#' @param isMultiOmics Boolean if loading gene sets for multiomics. Defaults to \code{FALSE}. #' @return A list of \code{geneSet}, \code{geneSetDes}, \code{geneSetDag}, \code{geneSetNet}, \code{standardId}. #' \describe{ #' \item{geneSet}{Gene set: A data frame with columns of "geneSet", "description", "genes"} @@ -14,7 +14,8 @@ #' @importFrom dplyr select distinct filter %>% #' @importFrom httr modify_url #' @export -loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, cache=NULL, hostName="https://www.webgestalt.org/") { +loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, + cache=NULL, hostName="https://www.webgestalt.org/", isMultiOmics = FALSE) { # TODO: multiple custom database ID types? geneSet <- NULL ##gene sets geneSetDes <- NULL ##gene set description file @@ -32,9 +33,9 @@ loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabase if (!is.vector(enrichDatabaseDescriptionFile)) { enrichDatabaseDescriptionFile = list(enrichDatabaseDescriptionFile) } - if (length(enrichDatabaseFile) != length(enrichDatabaseDescriptionFile)) { - stop("The number of custom database and its description files should be equal. Use NULL for placeholder.") - } + if (length(enrichDatabaseFile) != length(enrichDatabaseDescriptionFile)) { + stop("The number of custom database and its description files should be equal. Use NULL for placeholder.") + } if (organism != "others") { # supported organism geneSetInfo <- listGeneSet(organism=organism, hostName=hostName, cache=cache) # load build-in databases @@ -46,7 +47,7 @@ loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabase } # get the ID type of the enriched database, such as entrezgene or phosphsiteSeq thisStandardId <- filter(geneSetInfo, .data$name==enrichDb)[[1, "idType"]] - if (!is.null(standardId) && standardId != thisStandardId) { + if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") } standardId <- thisStandardId @@ -93,7 +94,7 @@ loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabase thisGeneSet <- idMapping(organism=organism, dataType="gmt", inputGeneFile=enrichDbFile, sourceIdType=enrichDatabaseType, targetIdType=NULL, mappingOutput=FALSE, cache=cache, hostName=hostName) thisStandardId <- thisGeneSet$standardId # should be just enrichDatabaseType here - if (!is.null(standardId) && standardId != thisStandardId) { + if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") } standardId <- thisStandardId diff --git a/README.md b/README.md index 39e93c5..fd22ddb 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # WebGestaltR +[![R-CMD-check](https://github.com/iblacksand/WebGestaltR/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/iblacksand/WebGestaltR/actions/workflows/check-standard.yaml) + > [!IMPORTANT] > The new version of WebGesaltR requires Rust, which must be installed on your device prior to installing or updating the package from CRAN. See the installation section for more information. diff --git a/dev_install b/dev_install old mode 100644 new mode 100755 diff --git a/man/loadGeneSet.Rd b/man/loadGeneSet.Rd index 85da494..0b6c609 100644 --- a/man/loadGeneSet.Rd +++ b/man/loadGeneSet.Rd @@ -11,7 +11,8 @@ loadGeneSet( enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, cache = NULL, - hostName = "https://www.webgestalt.org/" + hostName = "https://www.webgestalt.org/", + isMultiOmics = FALSE ) } \arguments{ @@ -46,6 +47,8 @@ column is the description of the category. All columns are separated by tabs.} \item{cache}{A directory to save data cache for reuse. Defaults to \code{NULL} and disabled.} \item{hostName}{The server URL for accessing data. Mostly for development purposes.} + +\item{isMultiOmics}{Boolean if loading gene sets for multiomics. Defaults to \code{FALSE}.} } \value{ A list of \code{geneSet}, \code{geneSetDes}, \code{geneSetDag}, \code{geneSetNet}, \code{standardId}. diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d8241fc..d26d773 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -132,14 +132,14 @@ checksum = "33833971650cade4bfa3097b979506bf2b4934b60392e110f95b94c2406cbb84" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", @@ -166,9 +166,9 @@ checksum = "a34aaa68a201f71eab5df5a67d1326add8aaf029434e939353bcab0534919ff1" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libm" @@ -408,22 +408,22 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -465,9 +465,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -523,20 +523,20 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.24" +version = "0.7.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "092cd76b01a033a9965b9097da258689d9e17c69ded5dcf41bca001dd20ebc6d" +checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.24" +version = "0.7.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a13a20a7c6a90e2034bcc65495799da92efcec6a8dd4f3fcb6f7a48988637ead" +checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] From 797034a0d7c06ede15687857c07f7701f832cf58 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 13 Nov 2023 17:17:58 -0600 Subject: [PATCH 73/82] fix news, leading edge, affinity propagation crash fix leading edge for down-regulated pathways, and fix ap cluster crash package crashed if there were no ap clusters. Now it is surrounded by a try catch fix news headers from 'WebGestalt' to 'WebGestaltR' --- NEWS.md | 47 ++++++++++++++++++++++++--------------- R/WebGestaltRGsea.R | 12 ++++++++-- R/WebGestaltRMultiOmics.R | 24 +++++++++++++------- R/gseaEnrichment.R | 29 ++++++++++++------------ R/swGsea.R | 10 ++++----- src/rust/Cargo.lock | 2 +- 6 files changed, 76 insertions(+), 48 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5fdfe79..215169f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,36 +1,47 @@ -# WebGestalt 0.4.6 (2023-05-31) -* Fixed a bug with R 4.3 +# WebGestaltR 0.4.6 (2023-05-31) -# WebGestalt 0.4.5 (2023-02-10) -* Updated default URLs to HTTPS -* Sort results by enrichment ratio for ties with FDR and P-value -* Fixed a bug in reading GMT with quotes -* Fixed a bug in reading GMT from URL without MIME -* Fixed a bug in GSEA plot path when format is SVG +- Fixed a bug with R 4.3 -# WebGestalt 0.4.4 (2020-07-23) -* Fixed a bug in affinity propagation +# WebGestaltR 0.4.5 (2023-02-10) + +- Updated default URLs to HTTPS +- Sort results by enrichment ratio for ties with FDR and P-value +- Fixed a bug in reading GMT with quotes +- Fixed a bug in reading GMT from URL without MIME +- Fixed a bug in GSEA plot path when format is SVG + +# WebGestaltR 0.4.4 (2020-07-23) + +- Fixed a bug in affinity propagation + +# WebGestaltR 0.4.3 (2020-01-16) -# WebGestalt 0.4.3 (2020-01-16) Bug fixes. Add a few advanced options for GSEA. -# WebGestalt 0.4.2 (2019-09-16) +# WebGestaltR 0.4.2 (2019-09-16) + Bug fixes. -# WebGestalt 0.4.1 (2019-07-03) +# WebGestaltR 0.4.1 (2019-07-03) + Add option to save downloaded data in cache. Bug fixes. -# WebGestalt 0.4.0 (2019-04-22) +# WebGestaltR 0.4.0 (2019-04-22) + Support of multiple databases for ORA and GSEA. Adjusted column names of the returned data frame. -# WebGestalt 0.3.1 (2019-03-14) +# WebGestaltR 0.3.1 (2019-03-14) + Bug fixes. The version with NAR update manuscript. -# WebGestalt 0.3.0 (2019-01-16) +# WebGestaltR 0.3.0 (2019-01-16) + Major updates in HTML report and for 2019 publication -# WebGestalt 0.1.1 +# WebGestaltR 0.1.1 + Stable version in WebGestalt 2017 update. -# WebGestalt 0.0.1 +# WebGestaltR 0.0.1 + First submission to CRAN. diff --git a/R/WebGestaltRGsea.R b/R/WebGestaltRGsea.R index 35f0225..1fb24b3 100644 --- a/R/WebGestaltRGsea.R +++ b/R/WebGestaltRGsea.R @@ -135,13 +135,21 @@ WebGestaltRGsea <- function(organism = "hsapiens", enrichDatabase = NULL, enrich kRes <- kMedoid(idsInSet, signedLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { - writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) + tryCatch({ + writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) + }, + error = function(e){ + cat("Error in writing ap clusters.\n") + }) } else { apRes <- NULL } clusters$ap <- apRes if (!is.null(kRes)) { - writeLines(sapply(kRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) + tryCatch({ + writeLines(sapply(kRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) + }, error = function(e) {cat("Error in writing kmedoid clusters.\n")}) + } else { kRes <- NULL } diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 74fb387..dd35f0b 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -161,6 +161,9 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (length(enrichDatabase) > 1 || length(enrichDatabaseFile) > 1) { stop("Only one enrichDatabase or enrichDatabaseFile can be specified for multiomics.") } + if (length(analyteTypes) == 1) { + stop("Performing multiomics analysis requires multiple analyte types. If you only have one analyte type, use the WebGestaltR(...) function instead.") + } if (enrichMethod == "ORA") { cat("Performing multi-omics ORA\nLoading the functional categories...\n") @@ -169,6 +172,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, geneSet <- all_sets[[1]]$geneSet geneSetDes <- all_sets[[1]]$geneSetDes geneSetNet <- all_sets[[1]]$geneSetNet + geneSetDag <- all_sets[[1]]$geneSetDag for (i in 2:length(all_sets)) { geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) @@ -226,7 +230,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, referenceGeneList <- loadReferenceGene( organism = organism, referenceGeneFile = referenceListFiles, referenceGene = referenceListFiles, referenceGeneType = referenceTypes, - referenceSet = referenceSet, collapseMethod = collapseMethod, + referenceSet = referenceLists, collapseMethod = collapseMethod, hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, cache = cache ) @@ -246,6 +250,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, geneSet <- all_sets[[1]]$geneSet geneSetDes <- all_sets[[1]]$geneSetDes geneSetNet <- all_sets[[1]]$geneSetNet + geneSetDag <- all_sets[[1]]$geneSetDag for (i in 2:length(all_sets)) { geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) @@ -293,21 +298,24 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, .load_meta_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { - databases <- c() + all_sets <- list(geneSet = list(), geneSetDes = list(), geneSetDag = list(), geneSetNet = list(), standardId = list()) if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases if (length(unique(analyteTypes)) == 1) { - databases <- enrichDatabase } else { for (i in seq_along(analyteTypes)) { - databases <- c(databases, get_gmt_file(hostName, analyteTypes[i], enrichDatabase, organism, cache)) + db <- get_gmt_file(hostName, analyteTypes[i], enrichDatabase, organism, cache) + res <- loadGeneSet( + organism = organism, enrichDatabase = db, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = TRUE + ) + elements <- names(res) + for (j in seq_along(elements)) { + all_sets[[elements[j]]][[i]] <- res[[elements[j]]] + } } } } else { databases <- NULL } - all_sets <- loadGeneSet( - organism = organism, enrichDatabase = databases, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, - enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = TRUE - ) return(all_sets) } diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R index cd8c0c1..5cbe6ed 100644 --- a/R/gseaEnrichment.R +++ b/R/gseaEnrichment.R @@ -42,7 +42,8 @@ gseaEnrichment <- function(hostName, outputDirectory, projectName, geneRankList, enrichRes <- gseaRes$Enrichment_Results %>% mutate(geneSet = rownames(gseaRes$Enrichment_Results)) %>% - select(.data$geneSet, enrichmentScore=.data$ES, normalizedEnrichmentScore=.data$NES, pValue=.data$p_val, FDR=.data$fdr) + select(.data$geneSet, enrichmentScore=.data$ES, normalizedEnrichmentScore=.data$NES, pValue=.data$p_val, FDR=.data$fdr, + leadingEdgeNum = .data$leading_edge) # TODO: handle errors if (sigMethod == "fdr") { @@ -61,19 +62,19 @@ gseaEnrichment <- function(hostName, outputDirectory, projectName, geneRankList, } if (!is.null(insig)) { - insig$leadingEdgeNum <- unname(sapply(insig$geneSet, function(geneSet) { - rsum <- gseaRes$Running_Sums[, geneSet] # Running sum is a matrix of gene by gene set - maxPeak <- max(rsum) - minPeak <- min(rsum) - if (abs(maxPeak) >= abs(minPeak)) { - peakIndex <- match(max(rsum), rsum) - leadingEdgeNum <- sum(gseaRes$Items_in_Set[[geneSet]]$rank <= peakIndex) - } else { - peakIndex <- match(min(rsum), rsum) - leadingEdgeNum <- sum(gseaRes$Items_in_Set[[geneSet]]$rank >= peakIndex) - } - return(leadingEdgeNum) - })) + # insig$leadingEdgeNum <- unname(sapply(insig$geneSet, function(geneSet) { + # rsum <- gseaRes$Running_Sums[, geneSet] # Running sum is a matrix of gene by gene set + # maxPeak <- max(rsum) + # minPeak <- min(rsum) + # if (abs(maxPeak) >= abs(minPeak)) { + # peakIndex <- match(max(rsum), rsum) + # leadingEdgeNum <- sum(gseaRes$Items_in_Set[[geneSet]]$rank <= peakIndex) + # } else { + # peakIndex <- match(min(rsum), rsum) + # leadingEdgeNum <- sum(gseaRes$Items_in_Set[[geneSet]]$rank >= peakIndex) + # } + # return(leadingEdgeNum) + # })) } plotSuffix <- ifelse("png" %in% plotFormat, "png", "svg") sig <- sig %>% left_join(geneSetName, by="geneSet") %>% diff --git a/R/swGsea.R b/R/swGsea.R index 33e2bcc..e794e63 100644 --- a/R/swGsea.R +++ b/R/swGsea.R @@ -86,12 +86,12 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac stop("please set 'min_set_size' to 3 or greater (default is 5)") } if (max_score != "max") { - if (length(max_score) != (ncol(input_df) - 2) | (!is.numeric(max_score))) { + if (length(max_score) != (ncol(input_df) - 2) || (!is.numeric(max_score))) { stop("max_score needs to be set to max or contain a numeric vector of maximum scores for each set") } } if (min_score != "min") { - if (length(min_score) == (ncol(input_df) - 2) | (!is.numeric(min_score))) { + if (length(min_score) == (ncol(input_df) - 2) || (!is.numeric(min_score))) { stop("min_score needs to be set to min or contain a numeric vector of minimum scores for each set") } } @@ -126,12 +126,12 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac } # if numeric threshold provided, set inset_mat to 1 for items that meet threshold or 0 for those that don't } else if (is.numeric(thresh)) { - if (thresh_type == "values" & length(thresh) == length(enr_test)) { + if (thresh_type == "values" && length(thresh) == length(enr_test)) { for (a in 1:length(thresh)) { items_in_set <- input_df$item[input_df[, enr_test[a]] >= thresh[a]] inset_mat[items_in_set, enr_test[a]] <- 1 } - } else if (thresh_type == "percentile" & thresh > 0 & thresh < 1) { + } else if (thresh_type == "percentile" && thresh > 0 & thresh < 1) { thresh1 <- vector(mode = "numeric", length = length(enr_test)) for (a in 1:length(enr_test)) { thresh1[a] <- quantile(input_df[, enr_test[a]], probs = thresh) @@ -214,7 +214,7 @@ swGsea <- function(input_df, thresh_type = "percentile", thresh = 0.9, thresh_ac rust_ranks <- input_df[, 2] rust_sets <- colnames(inset_mat) rust_result <- gsea_rust(min_set_size, max_set_size, perms, rust_sets, rust_parts, rust_analytes, rust_ranks) - output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES) + output_df <- data.frame(fdr = rust_result$fdr, p_val = rust_result$p_val, ES = rust_result$ES, NES = rust_result$NES, leading_edge = rust_result$leading_edge) rownames(output_df) <- rust_result$gene_sets running_sum <- do.call('cbind', rust_result$running_sum) dimnames(running_sum) <- list(rust_analytes, names(rust_result$running_sum)) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d26d773..52b9805 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -501,7 +501,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#c439dde641285355f1023797fd439ce3bad98cad" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#5fd6c64238a9f5a39566a95d47786e1290bceb1d" dependencies = [ "ahash", "csv", From 9833623bd077a1aa44ad8200e4dec977d74a42e2 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 14 Nov 2023 11:14:00 -0600 Subject: [PATCH 74/82] fix reading of id lists Multi-omics reference list reading --- .Rbuildignore | 3 ++- R/WebGestaltRMultiOmics.R | 45 ++++++++++++++++++++++++++++----------- R/WebGestaltROra.R | 28 ++++++++++++++++++------ dev_install | 10 +++++---- install | 3 ++- src/rust/src/lib.rs | 2 ++ 6 files changed, 66 insertions(+), 25 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 7c7c2bd..b9af200 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,4 +7,5 @@ ^docs$ ^pkgdown$ ^logo\.svg$ -$build_hash\.py$ +^build_hash\.py$ +^\.cargo$ diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index dd35f0b..806bd1f 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -181,12 +181,12 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, databaseStandardId <- "multiomics" } } else { + databases <- all_sets$databases geneSet <- all_sets$geneSet geneSetDag <- all_sets$geneSetDag geneSetNet <- all_sets$geneSetNet databaseStandardId <- all_sets$standardId } - rm(all_sets) cat("Loading the ID lists...\n") interest_lists <- list() @@ -195,7 +195,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, interestingGeneMap <- loadInterestGene( organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = geneSet + hostName = hostName, geneSet = all_sets$geneSet[[i]] ) if (organism == "others") { interestGeneList <- unique(interestingGeneMap) @@ -211,7 +211,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, interestingGeneMap <- loadInterestGene( organism = organism, dataType = "list", inputGeneFile = NULL, inputGene = analyteLists[i], geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = geneSet + hostName = hostName, geneSet = all_sets$geneSet[[i]] ) if (organism == "others") { interestGeneList <- unique(interestingGeneMap) @@ -226,16 +226,34 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, # Load Gene Sets cat("Loading the reference lists...\n") + reference_lists <- list() + if (is.null(referenceLists)) { + for (i in seq_along(referenceListFiles)) { + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = referenceListFiles[i], + referenceGene = NULL, referenceGeneType = referenceTypes[i], + referenceSet = NULL, collapseMethod = collapseMethod, + hostName = hostName, geneSet = all_sets$geneSet[[i]], + interestGeneList = interest_lists[[i]], + cache = cache + ) + reference_lists[[i]] <- referenceGeneList + } + } else { + for (i in seq_along(analyteLists)) { + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = NULL, + referenceGene = referenceLists[i], referenceGeneType = NULL, + referenceSet = NULL, collapseMethod = collapseMethod, + hostName = hostName, geneSet = all_sets$geneSet[[i]], + interestGeneList = interest_lists[[i]], + cache = cache + ) + reference_lists[[i]] <- referenceGeneList + } + } - referenceGeneList <- loadReferenceGene( - organism = organism, referenceGeneFile = referenceListFiles, - referenceGene = referenceListFiles, referenceGeneType = referenceTypes, - referenceSet = referenceLists, collapseMethod = collapseMethod, - hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, - cache = cache - ) - - oraRes <- multiOraEnrichment(interestGeneList, referenceGeneList, geneSet, minNum = minNum, + oraRes <- multiOraEnrichment(interest_lists, reference_lists, geneSet, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr) @@ -298,7 +316,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, .load_meta_gmt <- function(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) { - all_sets <- list(geneSet = list(), geneSetDes = list(), geneSetDag = list(), geneSetNet = list(), standardId = list()) + all_sets <- list(geneSet = list(), geneSetDes = list(), geneSetDag = list(), geneSetNet = list(), standardId = list(), databases = list()) if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases if (length(unique(analyteTypes)) == 1) { } else { @@ -312,6 +330,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, for (j in seq_along(elements)) { all_sets[[elements[j]]][[i]] <- res[[elements[j]]] } + all_sets$databases[[i]] <- db } } } else { diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index aa5245c..fb0877e 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -25,7 +25,7 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD ################ Check parameter ################ errorTest <- parameterErrorMessage(enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache) - if(!is.null(enrichDatabase)){ + if(!is.null(enrichDatabase)) { if(enrichDatabase == "all") { all_sets <- listGeneSet(organism = organism, hostName = hostName, cache = cache) all_sets <- all_sets[all_sets$idType == "entrezgene",] @@ -61,7 +61,10 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD ################### Load reference gene set ############## cat("Loading the reference list...\n") - referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) + referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, + referenceGene = referenceGene, referenceGeneType = referenceGeneType, + referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, + geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) ########## Create project folder ############## if (isOutput) { @@ -135,13 +138,13 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD apRes <- NULL wscRes <- NULL kRes <- NULL - if(useAffinityPropagation){ + if(useAffinityPropagation) { apRes <- affinityPropagation(idsInSet, minusLogP) } - if(useWeightedSetCover){ + if(useWeightedSetCover) { wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) } - if(usekMedoid){ + if(usekMedoid) { kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { @@ -168,7 +171,20 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD if (isOutput) { ############## Create report ################## cat("Generate the final report...\n") - createReport(hostName = hostName, outputDirectory = outputDirectory, organism = organism, projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, enrichedSig = enrichedSig, background = insig, geneTables = geneTables, clusters = clusters, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, interestGeneFile = interestGeneFile, interestGene = interestGene, interestGeneType = interestGeneType, collapseMethod = collapseMethod, referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, dagColor = dagColor) + createReport(hostName = hostName, outputDirectory = outputDirectory, organism = organism, + projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, + geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, + interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, + enrichedSig = enrichedSig, background = insig, geneTables = geneTables, + clusters = clusters, enrichDatabase = enrichDatabase, + enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, + interestGeneFile = interestGeneFile, interestGene = interestGene, + interestGeneType = interestGeneType, collapseMethod = collapseMethod, + referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, + referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, + topThr = topThr, reportNum = reportNum, dagColor = dagColor) cwd <- getwd() setwd(projectDir) diff --git a/dev_install b/dev_install index a2a19a9..f3020fd 100755 --- a/dev_install +++ b/dev_install @@ -1,8 +1,10 @@ #!/usr/bin/env bash -cd src/rust -cargo update -cd .. -cd .. +echo "Updating rust dependencies" +( + cd src/rust || exit + cargo update +) +echo "Removing old version" R -e "library(utils) remove.packages('WebGestaltR') q()" diff --git a/install b/install index 03b2203..ac17515 100755 --- a/install +++ b/install @@ -1,8 +1,9 @@ #!/usr/bin/env bash +echo "Removing old version" R -e "library(utils) remove.packages('WebGestaltR') q()" -echo "Old version removed" +printf "Old version removed\nInstalling new version." R -e "library(devtools) rextendr::document() install('.') diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 52d42ae..24f15b8 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(non_snake_case)] +#![allow(clippy::too_many_arguments)] use std::vec; use ahash::{AHashMap, AHashSet}; From 7d3be5376168fc0a4f859fef023ae06a5433b6a7 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Wed, 15 Nov 2023 16:26:22 -0600 Subject: [PATCH 75/82] Metaanalysis ORA working with rust results --- R/WebGestaltRMultiOmics.R | 48 +++++++++++++++++++------------------- R/extendr-wrappers.R | 2 +- R/multiOraEnrichment.R | 6 +++-- man/rust_multiomics_ora.Rd | 6 ++--- src/rust/src/lib.rs | 47 ++++++++++++++++++++----------------- 5 files changed, 57 insertions(+), 52 deletions(-) diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 806bd1f..e921a09 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -168,25 +168,25 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (enrichMethod == "ORA") { cat("Performing multi-omics ORA\nLoading the functional categories...\n") all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) - if (length(all_sets) > 1) { - geneSet <- all_sets[[1]]$geneSet - geneSetDes <- all_sets[[1]]$geneSetDes - geneSetNet <- all_sets[[1]]$geneSetNet - geneSetDag <- all_sets[[1]]$geneSetDag - for (i in 2:length(all_sets)) { - geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) - geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) - geneSetDag <- rbind(geneSetDag, all_sets[[i]]$geneSetDag) - geneSetNet <- rbind(geneSetNet, all_sets[[i]]$geneSetNet) - databaseStandardId <- "multiomics" - } - } else { - databases <- all_sets$databases - geneSet <- all_sets$geneSet - geneSetDag <- all_sets$geneSetDag - geneSetNet <- all_sets$geneSetNet - databaseStandardId <- all_sets$standardId - } + # if (length(all_sets) > 1) { + # geneSet <- all_sets[[1]]$geneSet + # geneSetDes <- all_sets[[1]]$geneSetDes + # geneSetNet <- all_sets[[1]]$geneSetNet + # geneSetDag <- all_sets[[1]]$geneSetDag + # for (i in 2:length(all_sets)) { + # geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) + # geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) + # geneSetDag <- rbind(geneSetDag, all_sets[[i]]$geneSetDag) + # geneSetNet <- rbind(geneSetNet, all_sets[[i]]$geneSetNet) + # databaseStandardId <- "multiomics" + # } + # } else { + # databases <- all_sets$databases + # geneSet <- all_sets$geneSet + # geneSetDag <- all_sets$geneSetDag + # geneSetNet <- all_sets$geneSetNet + # databaseStandardId <- all_sets$standardId + # } cat("Loading the ID lists...\n") interest_lists <- list() @@ -195,7 +195,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, interestingGeneMap <- loadInterestGene( organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = all_sets$geneSet[[i]] + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] ) if (organism == "others") { interestGeneList <- unique(interestingGeneMap) @@ -211,7 +211,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, interestingGeneMap <- loadInterestGene( organism = organism, dataType = "list", inputGeneFile = NULL, inputGene = analyteLists[i], geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = all_sets$geneSet[[i]] + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] ) if (organism == "others") { interestGeneList <- unique(interestingGeneMap) @@ -233,7 +233,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, organism = organism, referenceGeneFile = referenceListFiles[i], referenceGene = NULL, referenceGeneType = referenceTypes[i], referenceSet = NULL, collapseMethod = collapseMethod, - hostName = hostName, geneSet = all_sets$geneSet[[i]], + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], interestGeneList = interest_lists[[i]], cache = cache ) @@ -245,7 +245,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, organism = organism, referenceGeneFile = NULL, referenceGene = referenceLists[i], referenceGeneType = NULL, referenceSet = NULL, collapseMethod = collapseMethod, - hostName = hostName, geneSet = all_sets$geneSet[[i]], + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], interestGeneList = interest_lists[[i]], cache = cache ) @@ -253,7 +253,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } } - oraRes <- multiOraEnrichment(interest_lists, reference_lists, geneSet, minNum = minNum, + oraRes <- multiOraEnrichment(interest_lists, reference_lists, all_sets[["geneSet"]], minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index f3ae32f..3e8ad54 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -71,7 +71,7 @@ ora_rust <- function(sets, parts, interest, reference) .Call(wrap__ora_rust, set #' @returns A list of vectors containing the results of ORA, with each list corresponding to each input list #' @author John Elizarraras #' @keywords internal -rust_multiomics_ora <- function(sets, parts, interest, reference, method) .Call(wrap__rust_multiomics_ora, sets, parts, interest, reference, method) +rust_multiomics_ora <- function(sets, big_part_vec, interest, reference, method) .Call(wrap__rust_multiomics_ora, sets, big_part_vec, interest, reference, method) # nolint end diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index c1f9c73..6fe7559 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -45,6 +45,8 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 genes <- lapply(geneSet, function(x) { tapply(x$gene, x$geneSet, rbind) }) - rust_result <- rust_multiomics_ora(geneSet, genes, interestGene, referenceGene) - print(head(rust_result)) + modified_geneset <- lapply(genes, function(x) { + names(x) + }) + rust_result <- rust_multiomics_ora(modified_geneset, genes, interestGene, referenceGene, "fisher") } diff --git a/man/rust_multiomics_ora.Rd b/man/rust_multiomics_ora.Rd index 244861d..492ccdb 100644 --- a/man/rust_multiomics_ora.Rd +++ b/man/rust_multiomics_ora.Rd @@ -4,18 +4,18 @@ \alias{rust_multiomics_ora} \title{Run multiomics ORA using Rust library} \usage{ -rust_multiomics_ora(sets, parts, interest, reference, method) +rust_multiomics_ora(sets, big_part_vec, interest, reference, method) } \arguments{ \item{sets}{list of the names of the analyte sets} -\item{parts}{list of the analyte in the analyte sets} - \item{interest}{list of analytes of interest} \item{reference}{list of analytes in the reference set} \item{method}{meta-analysis method to get meta-p values} + +\item{parts}{list of the analyte in the analyte sets} } \value{ A list of vectors containing the results of ORA, with each list corresponding to each input list diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 24f15b8..e365586 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -125,7 +125,7 @@ fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { /// Run multiomics ORA using Rust library /// @param sets list of the names of the analyte sets -/// @param parts list of the analyte in the analyte sets +/// @param big_part_vec list of the analyte in the analyte sets /// @param interest list of analytes of interest /// @param reference list of analytes in the reference set /// @param method meta-analysis method to get meta-p values @@ -135,7 +135,7 @@ fn ora_rust(sets: Robj, parts: Robj, interest: Robj, reference: Robj) -> List { #[extendr] pub fn rust_multiomics_ora( sets: Robj, - parts: Robj, + big_part_vec: Robj, interest: Robj, reference: Robj, method: Robj, @@ -144,7 +144,8 @@ pub fn rust_multiomics_ora( fdr_method: webgestalt_lib::stat::AdjustmentMethod::None, ..Default::default() }; - let reference_list = reference.as_list().unwrap(); + let parts = big_part_vec.as_list().unwrap(); + let reference_lists = reference.as_list().unwrap(); let method = match method.as_str().unwrap() { "fisher" => webgestalt_lib::methods::multiomics::MultiOmicsMethod::Meta( webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Fisher, @@ -153,26 +154,29 @@ pub fn rust_multiomics_ora( webgestalt_lib::methods::multiomics::MetaAnalysisMethod::Stouffer, ), }; - let mut gmt: Vec = Vec::new(); - let set_vec = sets.as_str_vector().unwrap(); - let parts_vec: Vec> = parts - .as_list() - .unwrap() - .iter() - .map(|(_, x)| x.as_string_vector().unwrap()) - .collect(); - for (i, set) in set_vec.iter().enumerate() { - gmt.push(Item { - id: set.to_string(), - url: String::default(), - parts: parts_vec[i].clone(), - }) - } + let interest_vec = interest.as_list().unwrap(); + let big_set_vec = sets.as_list().unwrap(); let mut jobs: Vec = Vec::new(); - for (i, (_, list)) in interest.as_list().unwrap().into_iter().enumerate() { - let interest_set: AHashSet = AHashSet::from_iter(list.as_string_vector().unwrap()); + for (i, (_, big_set)) in big_set_vec.into_iter().enumerate() { + let mut gmt: Vec = Vec::new(); + let set_vec = big_set.as_str_vector().unwrap(); + let parts_vec: Vec> = parts[i] + .as_list() + .unwrap() + .iter() + .map(|(_, x)| x.as_string_vector().unwrap()) + .collect(); + for (i, set) in set_vec.iter().enumerate() { + gmt.push(Item { + id: set.to_string(), + url: String::default(), + parts: parts_vec[i].clone(), + }) + } + let interest_set: AHashSet = + AHashSet::from_iter(interest_vec[i].as_string_vector().unwrap()); let reference_set: AHashSet = - AHashSet::from_iter(reference_list[i].as_string_vector().unwrap()); + AHashSet::from_iter(reference_lists[i].as_string_vector().unwrap()); let job = ORAJob { gmt: gmt.clone(), interest_list: interest_set.clone(), @@ -181,7 +185,6 @@ pub fn rust_multiomics_ora( }; jobs.push(job) } - let res: Vec> = multiomic_ora(jobs, method); let mut all_res: Vec = Vec::new(); for analysis in res { From 12fc4e4df1e6a150eb95198e09e1c84de5cd281b Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 20 Nov 2023 11:43:20 -0600 Subject: [PATCH 76/82] Update to comments, add multiomics ORA to dataframe added special case for the meta-analysis create output for multiORAEnrichment --- R/WebGestaltROra.R | 80 ++++++++++++++++---------------- R/extendr-wrappers.R | 2 +- R/multiOraEnrichment.R | 93 +++++++++++++++++++++++++++++++++++++- man/rust_multiomics_ora.Rd | 4 +- src/rust/Cargo.lock | 10 ++-- 5 files changed, 141 insertions(+), 48 deletions(-) diff --git a/R/WebGestaltROra.R b/R/WebGestaltROra.R index fb0877e..e07bc07 100644 --- a/R/WebGestaltROra.R +++ b/R/WebGestaltROra.R @@ -4,8 +4,8 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, - topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, - dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, + topThr = 10, reportNum = 20, setCoverNum = 10, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, + dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 10) { enrichMethod <- "ORA" projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) @@ -22,16 +22,15 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD referenceGene <- testNull(referenceGene) referenceGeneType <- testNull(referenceGeneType) referenceSet <- testNull(referenceSet) - ################ Check parameter ################ errorTest <- parameterErrorMessage(enrichMethod = enrichMethod, organism = organism, collapseMethod = collapseMethod, minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, isOutput = isOutput, outputDirectory = outputDirectory, dagColor = dagColor, hostName = hostName, cache = cache) - if(!is.null(enrichDatabase)) { - if(enrichDatabase == "all") { - all_sets <- listGeneSet(organism = organism, hostName = hostName, cache = cache) - all_sets <- all_sets[all_sets$idType == "entrezgene",] - enrichDatabase <- all_sets$name - enrichDatabaseType <- all_sets$idType - } + if (!is.null(enrichDatabase)) { + if (enrichDatabase == "all") { + all_sets <- listGeneSet(organism = organism, hostName = hostName, cache = cache) + all_sets <- all_sets[all_sets$idType == "entrezgene", ] + enrichDatabase <- all_sets$name + enrichDatabaseType <- all_sets$idType + } } if (!is.null(errorTest)) { stop(errorTest) @@ -61,10 +60,12 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD ################### Load reference gene set ############## cat("Loading the reference list...\n") - referenceGeneList <- loadReferenceGene(organism = organism, referenceGeneFile = referenceGeneFile, - referenceGene = referenceGene, referenceGeneType = referenceGeneType, - referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, - geneSet = geneSet, interestGeneList = interestGeneList, cache = cache) + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = referenceGeneFile, + referenceGene = referenceGene, referenceGeneType = referenceGeneType, + referenceSet = referenceSet, collapseMethod = collapseMethod, hostName = hostName, + geneSet = geneSet, interestGeneList = interestGeneList, cache = cache + ) ########## Create project folder ############## if (isOutput) { @@ -138,25 +139,25 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD apRes <- NULL wscRes <- NULL kRes <- NULL - if(useAffinityPropagation) { - apRes <- affinityPropagation(idsInSet, minusLogP) + if (useAffinityPropagation) { + apRes <- affinityPropagation(idsInSet, minusLogP) } - if(useWeightedSetCover) { - wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) + if (useWeightedSetCover) { + wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) } - if(usekMedoid) { - kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) + if (usekMedoid) { + kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { - writeLines(sapply(apRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) + writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, ".txt"))) } else { - apRes <- NULL + apRes <- NULL } clusters$ap <- apRes if (!is.null(kRes)) { - writeLines(sapply(kRes$clusters, paste, collapse="\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) + writeLines(sapply(kRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, ".txt"))) } else { - kRes <- NULL + kRes <- NULL } clusters$km <- kRes if (!is.null(wscRes$topSets)) { @@ -171,20 +172,22 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD if (isOutput) { ############## Create report ################## cat("Generate the final report...\n") - createReport(hostName = hostName, outputDirectory = outputDirectory, organism = organism, - projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, - geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, - interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, - enrichedSig = enrichedSig, background = insig, geneTables = geneTables, - clusters = clusters, enrichDatabase = enrichDatabase, - enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, - enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, - interestGeneFile = interestGeneFile, interestGene = interestGene, - interestGeneType = interestGeneType, collapseMethod = collapseMethod, - referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, - referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, - maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, - topThr = topThr, reportNum = reportNum, dagColor = dagColor) + createReport( + hostName = hostName, outputDirectory = outputDirectory, organism = organism, + projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, + geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, + interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, + enrichedSig = enrichedSig, background = insig, geneTables = geneTables, + clusters = clusters, enrichDatabase = enrichDatabase, + enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, + interestGeneFile = interestGeneFile, interestGene = interestGene, + interestGeneType = interestGeneType, collapseMethod = collapseMethod, + referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, + referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, + topThr = topThr, reportNum = reportNum, dagColor = dagColor + ) cwd <- getwd() setwd(projectDir) @@ -193,6 +196,5 @@ WebGestaltROra <- function(organism = "hsapiens", enrichDatabase = NULL, enrichD cat("Results can be found in the ", projectDir, "!\n", sep = "") } - return(outputEnrichedSig) } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 3e8ad54..b265450 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -64,7 +64,7 @@ ora_rust <- function(sets, parts, interest, reference) .Call(wrap__ora_rust, set #' Run multiomics ORA using Rust library #' @param sets list of the names of the analyte sets -#' @param parts list of the analyte in the analyte sets +#' @param big_part_vec list of the analyte in the analyte sets #' @param interest list of analytes of interest #' @param reference list of analytes in the reference set #' @param method meta-analysis method to get meta-p values diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index 6fe7559..729df0c 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -1,3 +1,5 @@ +#' @importFrom dplyr filter select left_join mutate arrange %>% group_by inner_join +#' @importFrom stats p.adjust phyper multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10) { @@ -22,10 +24,19 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 refG <- list() intG <- list() + met_intG <- data.frame() for (i in seq_along(geneSetNum)) { refG[[i]] <- data.frame(geneSet = names(geneSetNum[[i]]), size = as.numeric(geneSetNum[[i]]), stringsAsFactors = FALSE) - intG[[i]] <- filter(geneSet[[i]], .data$gene %in% interestGene) + intG[[i]] <- filter(geneSet[[i]], .data$gene %in% interestGene[[i]]) + if (i == 1) { + met_intG <- intG[[i]] + } else { + met_intG <- rbind(met_intG, intG[[i]]) + } } + met_intG <- distinct(met_intG) + met_intG <- tapply(met_intG$gene, met_intG$geneSet, paste, collapse = ";") + met_intG <- data.frame(geneSet = names(met_intG), overlapId = as.character(met_intG), stringsAsFactors = FALSE) intGId <- lapply(intG, function(x) { tapply(x$gene, x$geneSet, paste, collapse = ";") }) @@ -49,4 +60,84 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 names(x) }) rust_result <- rust_multiomics_ora(modified_geneset, genes, interestGene, referenceGene, "fisher") + rust_result_df <- lapply(rust_result, function(x) { + data.frame( + FDR = p.adjust(x$p, method = fdrMethod), pValue = x$p, expect = x$expect, + enrichmentRatio = x$enrichment_ratio, geneSet = x$gene_set, overlap = x$overlap + ) + }) + enrichedResultList <- list() + backgroundList <- list() + for (i in seq_along(rust_result_df)) { + if (i == 1) { # Meta-analysis + enrichedResult <- rust_result_df[[i]] %>% + left_join(met_intG, by = "geneSet") %>% # get overlapping gene IDs + arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) + enrichedResult$overlap <- sapply(enrichedResult$overlapId, function(x) { + length(unlist(strsplit(x, ";"))) + }) + if (sigMethod == "fdr") { + enrichedResultSig <- filter(enrichedResult, .data$FDR < fdrThr) + if (nrow(enrichedResultSig) == 0) { + warning("No significant gene set is identified based on FDR ", fdrThr, "!") + enrichedResultList[[i]] <- NULL + backgroundList[[i]] <- NULL + } else { + enrichedResultInsig <- enrichedResult %>% + filter(.data$FDR >= fdrThr, .data$overlap != 0) %>% + select(.data$geneSet, .data$enrichmentRatio, .data$FDR, .data$overlap) + enrichedResultList[[i]] <- enrichedResultSig + backgroundList[[i]] <- enrichedResultInsig + } + } else { + # for the top method, we only select the terms with at least one annotated interesting gene + enrichedResult <- enrichedResult %>% filter(.data$overlap != 0) + if (nrow(enrichedResult) > topThr) { + enrichedResultSig <- enrichedResult[1:topThr, ] + enrichedResultInsig <- enrichedResult[(topThr + 1):nrow(enrichedResult), c("geneSet", "enrichmentRatio", "FDR", "overlap")] + } else { + enrichedResultSig <- enrichedResult + enrichedResultInsig <- data.frame() + } + enrichedResultList[[i]] <- enrichedResultSig + backgroundList[[i]] <- enrichedResultInsig + } + } else { + enrichedResult <- geneSetFilter[[i - 1]] %>% + left_join(refG[[i - 1]], by = "geneSet") %>% + left_join( + rust_result_df[[i - 1]], + by = "geneSet", + ) %>% + left_join(intGId[[i - 1]], by = "geneSet") %>% # get overlapping gene IDs + arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) + if (sigMethod == "fdr") { + enrichedResultSig <- filter(enrichedResult, .data$FDR < fdrThr) + if (nrow(enrichedResultSig) == 0) { + warning("No significant gene set is identified based on FDR ", fdrThr, "!") + enrichedResultList[[i]] <- NULL + backgroundList[[i]] <- NULL + } else { + enrichedResultInsig <- enrichedResult %>% + filter(.data$FDR >= fdrThr, .data$overlap != 0) %>% + select(.data$geneSet, .data$enrichmentRatio, .data$FDR, .data$overlap) + enrichedResultList[[i]] <- enrichedResultSig + backgroundList[[i]] <- enrichedResultInsig + } + } else { + # for the top method, we only select the terms with at least one annotated interesting gene + enrichedResult <- enrichedResult %>% filter(.data$overlap != 0) + if (nrow(enrichedResult) > topThr) { + enrichedResultSig <- enrichedResult[1:topThr, ] + enrichedResultInsig <- enrichedResult[(topThr + 1):nrow(enrichedResult), c("geneSet", "enrichmentRatio", "FDR", "overlap")] + } else { + enrichedResultSig <- enrichedResult + enrichedResultInsig <- data.frame() + } + enrichedResultList[[i]] <- enrichedResultSig + backgroundList[[i]] <- enrichedResultInsig + } + } + } + return(list(enriched = enrichedResultList, background = backgroundList)) } diff --git a/man/rust_multiomics_ora.Rd b/man/rust_multiomics_ora.Rd index 492ccdb..b641507 100644 --- a/man/rust_multiomics_ora.Rd +++ b/man/rust_multiomics_ora.Rd @@ -9,13 +9,13 @@ rust_multiomics_ora(sets, big_part_vec, interest, reference, method) \arguments{ \item{sets}{list of the names of the analyte sets} +\item{big_part_vec}{list of the analyte in the analyte sets} + \item{interest}{list of analytes of interest} \item{reference}{list of analytes in the reference set} \item{method}{meta-analysis method to get meta-p values} - -\item{parts}{list of the analyte in the analyte sets} } \value{ A list of vectors containing the results of ORA, with each list corresponding to each input list diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 52b9805..d7fccd2 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -501,7 +501,7 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "webgestalt_lib" version = "0.1.0" -source = "git+https://github.com/bzhanglab/webgestalt_rust.git#5fd6c64238a9f5a39566a95d47786e1290bceb1d" +source = "git+https://github.com/bzhanglab/webgestalt_rust.git#c3ac1792d892c17989a01b39e291e83fed300aaf" dependencies = [ "ahash", "csv", @@ -523,18 +523,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.25" +version = "0.7.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" +checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.25" +version = "0.7.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" +checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" dependencies = [ "proc-macro2", "quote", From b18dcbe923dac443d44983a5857f2475ddddb680 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Tue, 21 Nov 2023 11:12:28 -0600 Subject: [PATCH 77/82] generate reports for ORA individual runs --- R/WebGestaltRMultiOmics.R | 105 +-------- R/WebGestaltRMultiomicsOra.R | 171 ++++++++++++++ R/loadGeneSet.R | 390 ++++++++++++++++---------------- R/multiOraEnrichment.R | 6 +- man/WebGestaltRMultiOmicsOra.Rd | 167 ++++++++++++++ 5 files changed, 555 insertions(+), 284 deletions(-) create mode 100644 R/WebGestaltRMultiomicsOra.R create mode 100644 man/WebGestaltRMultiOmicsOra.Rd diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index e921a09..3a1a262 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -103,7 +103,6 @@ #' @param mergeMethod The method to merge the results from multiple omics (options: \code{mean}, \code{max}). Only used if \code{isMetaAnalysis = FALSE}. Defaults to \code{mean}. #' @param normalizationMethod The method to normalize the results from multiple omics (options: \code{rank}, \code{median}, \code{mean}). Only used if \code{isMetaAnalysis = FALSE}. #' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}. -#' #' @export WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, @@ -138,6 +137,10 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (!is.null(error_msg)) { stop(error_msg) } + if (is.null(projectName)) { + projectName <- as.character(as.integer(Sys.time())) + } + projectName <- sanitizeFileName(projectName) # use for GOSlim summary file name, convert punct to _ # Verify parameters mergeMethod <- tolower(mergeMethod) @@ -166,98 +169,14 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, } if (enrichMethod == "ORA") { - cat("Performing multi-omics ORA\nLoading the functional categories...\n") - all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) - # if (length(all_sets) > 1) { - # geneSet <- all_sets[[1]]$geneSet - # geneSetDes <- all_sets[[1]]$geneSetDes - # geneSetNet <- all_sets[[1]]$geneSetNet - # geneSetDag <- all_sets[[1]]$geneSetDag - # for (i in 2:length(all_sets)) { - # geneSet <- rbind(geneSet, all_sets[[i]]$geneSet) - # geneSetDes <- rbind(geneSetDes, all_sets[[i]]$geneSetDes) - # geneSetDag <- rbind(geneSetDag, all_sets[[i]]$geneSetDag) - # geneSetNet <- rbind(geneSetNet, all_sets[[i]]$geneSetNet) - # databaseStandardId <- "multiomics" - # } - # } else { - # databases <- all_sets$databases - # geneSet <- all_sets$geneSet - # geneSetDag <- all_sets$geneSetDag - # geneSetNet <- all_sets$geneSetNet - # databaseStandardId <- all_sets$standardId - # } - - cat("Loading the ID lists...\n") - interest_lists <- list() - if (is.null(analyteLists)) { - for (i in seq_along(analyteListFiles)) { - interestingGeneMap <- loadInterestGene( - organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, - geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] - ) - if (organism == "others") { - interestGeneList <- unique(interestingGeneMap) - interest_lists[[i]] <- interestGeneList - } else { - interestStandardId <- interestingGeneMap$standardId - interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) - interest_lists[[i]] <- interestGeneList - } - } - } else { - for (i in seq_along(analyteLists)) { - interestingGeneMap <- loadInterestGene( - organism = organism, dataType = "list", inputGeneFile = NULL, inputGene = analyteLists[i], - geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, - hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] - ) - if (organism == "others") { - interestGeneList <- unique(interestingGeneMap) - interest_lists[[i]] <- interestGeneList - } else { - interestStandardId <- interestingGeneMap$standardId - interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) - interest_lists[[i]] <- interestGeneList - } - } - } - - # Load Gene Sets - cat("Loading the reference lists...\n") - reference_lists <- list() - if (is.null(referenceLists)) { - for (i in seq_along(referenceListFiles)) { - referenceGeneList <- loadReferenceGene( - organism = organism, referenceGeneFile = referenceListFiles[i], - referenceGene = NULL, referenceGeneType = referenceTypes[i], - referenceSet = NULL, collapseMethod = collapseMethod, - hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], - interestGeneList = interest_lists[[i]], - cache = cache - ) - reference_lists[[i]] <- referenceGeneList - } - } else { - for (i in seq_along(analyteLists)) { - referenceGeneList <- loadReferenceGene( - organism = organism, referenceGeneFile = NULL, - referenceGene = referenceLists[i], referenceGeneType = NULL, - referenceSet = NULL, collapseMethod = collapseMethod, - hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], - interestGeneList = interest_lists[[i]], - cache = cache - ) - reference_lists[[i]] <- referenceGeneList - } - } - - oraRes <- multiOraEnrichment(interest_lists, reference_lists, all_sets[["geneSet"]], minNum = minNum, - maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, - fdrThr = fdrThr, topThr = topThr) - - + WebGestaltRMultiOmicsOra(analyteLists = analyteLists, analyteListFiles = analyteListFiles, analyteTypes = analyteTypes, organism = organism, + enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, collapseMethod = collapseMethod, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, reportNum = reportNum, + setCoverNum = setCoverNum, perNum = perNum, isOutput = isOutput, outputDirectory = outputDirectory, projectName = projectName, + dagColor = dagColor, nThreads = nThreads, cache = cache, hostName = hostName, useWeightedSetCover = useWeightedSetCover, + useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k, referenceLists = referenceLists, + referenceListFiles = referenceListFiles, referenceTypes = referenceTypes) ## Meta-analysis } else if (enrichMethod == "GSEA") { if (isMetaAnalysis) { diff --git a/R/WebGestaltRMultiomicsOra.R b/R/WebGestaltRMultiomicsOra.R new file mode 100644 index 0000000..22fda21 --- /dev/null +++ b/R/WebGestaltRMultiomicsOra.R @@ -0,0 +1,171 @@ +#' @title Multi-omics ORA +#' @inheritParams WebGestaltRMultiOmics +WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", + enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, + collapseMethod = "mean", minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, + topThr = 10, reportNum = 100, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), + projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", + useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 25, + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { + projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) + cat("Performing multi-omics ORA\nLoading the functional categories...\n") + all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) + cat("Loading the ID lists...\n") + interest_lists <- list() + interestGeneMaps <- list() + if (is.null(analyteLists)) { + for (i in seq_along(analyteListFiles)) { + interestingGeneMap <- loadInterestGene( + organism = organism, dataType = "list", inputGeneFile = analyteListFiles[i], inputGene = NULL, + geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] + ) + interestGeneMaps[[i]] <- interestingGeneMap + if (organism == "others") { + interestGeneList <- unique(interestingGeneMap) + interest_lists[[i]] <- interestGeneList + } else { + interestStandardId <- interestingGeneMap$standardId + interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) + interest_lists[[i]] <- interestGeneList + } + } + } else { + for (i in seq_along(analyteLists)) { + interestingGeneMap <- loadInterestGene( + organism = organism, dataType = "list", inputGeneFile = NULL, inputGene = analyteLists[i], + geneType = analyteTypes[i], collapseMethod = collapseMethod, cache = cache, + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]] + ) + interestGeneMaps[[i]] <- interestingGeneMap + if (organism == "others") { + interestGeneList <- unique(interestingGeneMap) + interest_lists[[i]] <- interestGeneList + } else { + interestStandardId <- interestingGeneMap$standardId + interestGeneList <- unique(interestingGeneMap$mapped[[interestStandardId]]) + interest_lists[[i]] <- interestGeneList + } + } + } + + # Load Gene Sets + cat("Loading the reference lists...\n") + reference_lists <- list() + if (is.null(referenceLists)) { + for (i in seq_along(referenceListFiles)) { + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = referenceListFiles[i], + referenceGene = NULL, referenceGeneType = referenceTypes[i], + referenceSet = NULL, collapseMethod = collapseMethod, + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], + interestGeneList = interest_lists[[i]], + cache = cache + ) + reference_lists[[i]] <- referenceGeneList + } + } else { + for (i in seq_along(analyteLists)) { + referenceGeneList <- loadReferenceGene( + organism = organism, referenceGeneFile = NULL, + referenceGene = referenceLists[i], referenceGeneType = NULL, + referenceSet = NULL, collapseMethod = collapseMethod, + hostName = hostName, geneSet = all_sets[["geneSet"]][[i]], + interestGeneList = interest_lists[[i]], + cache = cache + ) + reference_lists[[i]] <- referenceGeneList + } + } + cat("Running multi-omics ORA...\n") + oraRes <- multiOraEnrichment(interest_lists, reference_lists, all_sets[["geneSet"]], + minNum = minNum, maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, + fdrThr = fdrThr, topThr = topThr + ) + if (is.null(oraRes)) { + return(NULL) + } + cat("Generating the report...\n") + dir.create(projectDir, showWarnings = FALSE) + for (i in 2:length(oraRes$enriched)) { + interestingGeneMap <- interestGeneMaps[[i - 1]] + enrichedSig <- oraRes$enriched[[i]] + insig <- oraRes$background[[i]] + # geneSetDag <- all_sets[["geneSetDag"]][[i - 1]] + geneSetDes <- all_sets[["geneSetDes"]][[i - 1]] + geneSet <- all_sets[["geneSet"]][[i - 1]] + clusters <- list() + geneTables <- list() + + if (!is.null(enrichedSig)) { + if (!is.null(geneSetDes)) { ####### Add extra description information ########### + enrichedSig <- enrichedSig %>% + left_join(geneSetDes, by = "geneSet") %>% + select(.data$geneSet, .data$description, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% + arrange(.data$FDR, .data$pValue, desc(.data$size)) %>% + mutate(description = ifelse(is.na(.data$description), "", .data$description)) # now des could be mixture + } else { + enrichedSig <- enrichedSig %>% + select(.data$geneSet, .data$link, .data$size, .data$overlap, .data$expect, .data$enrichmentRatio, .data$pValue, .data$FDR, .data$overlapId) %>% + arrange(.data$FDR, .data$pValue, desc(.data$size)) + } + + geneTables <- getGeneTables(organism, enrichedSig, "overlapId", interestingGeneMap) + if (organism != "others") { + enrichedSig$link <- mapply( + function(link, geneList) linkModification("ORA", link, geneList, interestingGeneMap, hostName), + enrichedSig$link, + enrichedSig$overlapId + ) + } + + if ("database" %in% colnames(geneSet)) { + # Add source database for multiple databases + enrichedSig <- enrichedSig %>% left_join(unique(geneSet[, c("geneSet", "database")]), by = "geneSet") + } + if (organism != "others" && analyteTypes[[i - 1]] != interestStandardId) { + outputEnrichedSig <- mapUserId(enrichedSig, "overlapId", interestingGeneMap) + } else { + outputEnrichedSig <- enrichedSig + } + + if (isOutput) { + write_tsv(outputEnrichedSig, file.path(projectDir, paste0("enrichment_results_", projectName, "_list", i - 1, ".txt"))) + idsInSet <- sapply(enrichedSig$overlapId, strsplit, split = ";") + names(idsInSet) <- enrichedSig$geneSet + minusLogP <- -log(enrichedSig$pValue) + minusLogP[minusLogP == Inf] <- -log(.Machine$double.eps) + apRes <- NULL + wscRes <- NULL + kRes <- NULL + if (useAffinityPropagation) { + apRes <- affinityPropagation(idsInSet, minusLogP) + } + if (useWeightedSetCover) { + wscRes <- weightedSetCover(idsInSet, 1 / minusLogP, setCoverNum, nThreads) + } + if (usekMedoid) { + kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) + } + if (!is.null(apRes)) { + writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, "_list", i - 1, ".txt"))) + } else { + apRes <- NULL + } + clusters$ap <- apRes + if (!is.null(kRes)) { + writeLines(sapply(kRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, "_list", i - 1, ".txt"))) + } else { + kRes <- NULL + } + clusters$km <- kRes + if (!is.null(wscRes$topSets)) { + writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, "_list", i - 1, ".txt"))) + clusters$wsc <- list(representatives = wscRes$topSets, coverage = wscRes$coverage) + } else { + clusters$wsc <- NULL + } + } + } + } +} diff --git a/R/loadGeneSet.R b/R/loadGeneSet.R index 01b7b4a..24e3be7 100644 --- a/R/loadGeneSet.R +++ b/R/loadGeneSet.R @@ -14,202 +14,214 @@ #' @importFrom dplyr select distinct filter %>% #' @importFrom httr modify_url #' @export -loadGeneSet <- function(organism="hsapiens", enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, - cache=NULL, hostName="https://www.webgestalt.org/", isMultiOmics = FALSE) { - # TODO: multiple custom database ID types? - geneSet <- NULL ##gene sets - geneSetDes <- NULL ##gene set description file - geneSetDag <- list() ##gene set DAG file - geneSetNet <- list() ##gene set network file - standardId <- NULL - - if (organism != "others" && !is.null(enrichDatabaseFile) && is.null(enrichDatabaseType)) { - stop("The ID type should be given in enrichDatabaseType for custom GMT files, e.g. genesymbol.") - } - # necessary because loop with length used below. enrichDatabase will skip NULL - if (!is.vector(enrichDatabaseFile)) { - enrichDatabaseFile = list(enrichDatabaseFile) - } - if (!is.vector(enrichDatabaseDescriptionFile)) { - enrichDatabaseDescriptionFile = list(enrichDatabaseDescriptionFile) - } - if (length(enrichDatabaseFile) != length(enrichDatabaseDescriptionFile)) { - stop("The number of custom database and its description files should be equal. Use NULL for placeholder.") - } - if (organism != "others") { # supported organism - geneSetInfo <- listGeneSet(organism=organism, hostName=hostName, cache=cache) - # load build-in databases - for (enrichDb in enrichDatabase) { - if (is.null(enrichDb) || enrichDb == "others") { next } # just for backward compatibility - if (!enrichDb %in% geneSetInfo$name) { - warning("Database ", enrichDb, " is not supported") - next - } - # get the ID type of the enriched database, such as entrezgene or phosphsiteSeq - thisStandardId <- filter(geneSetInfo, .data$name==enrichDb)[[1, "idType"]] - if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { - stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") - } - standardId <- thisStandardId - - #########Read GMT file from the existing database########### - if (startsWith(hostName, "file://")) { - gmtPath <- removeFileProtocol(file.path(hostName, "geneset", paste0(paste(organism, enrichDb, standardId, sep="_"), ".gmt"))) - thisGeneSet <- readGmt(gmtPath) - thisGeneSet$database <- enrichDb # add a column for database source - geneSet <- rbind(geneSet, thisGeneSet) - } else { - gmtUrl <- modify_url(file.path(hostName, "api", "geneset"), query=list(organism=organism, database=enrichDb, standardId=standardId, fileType="gmt")) - thisGeneSet <- readGmt(gmtUrl, cache=cache) - thisGeneSet$database <- enrichDb - oriCnt <- nrow(thisGeneSet) - thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) - if (nrow(thisGeneSet) < oriCnt) { - warning(paste("Duplicate gene set names in", enrichDb, "have been ignored.")) - } - geneSet <- rbind(geneSet, thisGeneSet) - } - - #########Read the description file############# - #geneSetDes <- rbind(geneSetDes, .loadGeneSetData(hostName, organism, enrichDb, standardId, "des", cache)) - thisGeneSetDes <- .loadGeneSetData(hostName, organism, enrichDb, standardId, "des", cache) - if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { - thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) - } - geneSetDes <- rbind(geneSetDes, thisGeneSetDes) - - ###########Try to load the DAG file################# - # assignment considering possible return of NULL - # list[] <- NULL will delete the element - geneSetDag[enrichDb] <- list(.loadGeneSetData(hostName, organism, enrichDb, standardId, "dag", cache)) - - ###########Try to load the network file if the gene sets are generated from the network########## - geneSetNet[enrichDb] <- list(.loadGeneSetData(hostName, organism, enrichDb, standardId, "net", cache)) - } - - # load local database files - for (i in 1:length(enrichDatabaseFile)) { - enrichDbFile <- enrichDatabaseFile[[i]] - if (is.null(enrichDbFile)) { next } - - thisGeneSet <- idMapping(organism=organism, dataType="gmt", inputGeneFile=enrichDbFile, sourceIdType=enrichDatabaseType, targetIdType=NULL, mappingOutput=FALSE, cache=cache, hostName=hostName) - thisStandardId <- thisGeneSet$standardId # should be just enrichDatabaseType here - if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { - stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") - } - standardId <- thisStandardId - - thisGeneSet <- thisGeneSet$mapped %>% select(.data$geneSet, .data$description, gene=standardId) %>% distinct() - - # load local description files - enrichDbDesFile <- enrichDatabaseDescriptionFile[[i]] - if (!is.null(enrichDbDesFile)) { - thisGeneSetDes <- .loadEnrichDatabaseDescriptionFile(thisGeneSet, enrichDbDesFile) - if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { - thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) - } - geneSetDes <- rbind(geneSetDes, thisGeneSetDes) - } - - fileName <- gsub(".gmt", "", basename(enrichDbFile), fixed=TRUE) - thisGeneSet$database <- fileName - oriCnt <- nrow(thisGeneSet) - thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) - if (nrow(thisGeneSet) < oriCnt) { - warning(paste("Duplicate gene set names in", fileName, "have been ignored.")) - } - geneSet <- rbind(geneSet, thisGeneSet) - - geneSetDag[fileName] <- list(NULL) # correct way to assign NULL to list element - geneSetNet[fileName] <- list(NULL) - } - } else { # custom organisms - for (i in 1:length(enrichDatabaseFile)) { - enrichDbFile <- enrichDatabaseFile[[i]] - if (is.null(enrichDbFile)) { next } - thisGeneSet <- readGmt(enrichDbFile) - - enrichDbDesFile <- enrichDatabaseDescriptionFile[[i]] - if (!is.null(enrichDbDesFile)) { - thisGeneSetDes <- .loadEnrichDatabaseDescriptionFile(thisGeneSet, enrichDbDesFile) - if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { - thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) - } - geneSetDes <- rbind(geneSetDes, thisGeneSetDes) - } - fileName <- gsub(".gmt", "", basename(enrichDbFile), fixed=TRUE) - thisGeneSet$database <- fileName - oriCnt <- nrow(thisGeneSet) - thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) - if (nrow(thisGeneSet) < oriCnt) { - warning(paste("Duplicate gene set names in", enrichDb, "have been ignored.")) - } - geneSet <- rbind(geneSet, thisGeneSet) - - geneSetDag[fileName] <- list(NULL) - geneSetNet[fileName] <- list(NULL) - } - } - if (is.null(geneSet)) { stop(enrichDatabaseError(type="empty")) } - - # unlist if just one database, for backward compatibility - if (length(geneSetDag) == 1) { geneSetDag <- geneSetDag[[1]] } - if (length(geneSetNet) == 1) { geneSetNet <- geneSetNet[[1]] } - - if (length(unique(geneSet$database)) == 1) { - # remove database column for single source - geneSet$database<- NULL - } - re <- list(geneSet=geneSet, geneSetDes=geneSetDes, geneSetDag=geneSetDag, geneSetNet=geneSetNet,standardId=standardId) - return(re) +loadGeneSet <- function(organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, + cache = NULL, hostName = "https://www.webgestalt.org/", isMultiOmics = FALSE) { + # TODO: multiple custom database ID types? + geneSet <- NULL ## gene sets + geneSetDes <- NULL ## gene set description file + geneSetDag <- list() ## gene set DAG file + geneSetNet <- list() ## gene set network file + standardId <- NULL + + if (organism != "others" && !is.null(enrichDatabaseFile) && is.null(enrichDatabaseType)) { + stop("The ID type should be given in enrichDatabaseType for custom GMT files, e.g. genesymbol.") + } + # necessary because loop with length used below. enrichDatabase will skip NULL + if (!is.vector(enrichDatabaseFile)) { + enrichDatabaseFile <- list(enrichDatabaseFile) + } + if (!is.vector(enrichDatabaseDescriptionFile)) { + enrichDatabaseDescriptionFile <- list(enrichDatabaseDescriptionFile) + } + if (length(enrichDatabaseFile) != length(enrichDatabaseDescriptionFile)) { + stop("The number of custom database and its description files should be equal. Use NULL for placeholder.") + } + if (organism != "others") { # supported organism + geneSetInfo <- listGeneSet(organism = organism, hostName = hostName, cache = cache) + # load build-in databases + for (enrichDb in enrichDatabase) { + if (is.null(enrichDb) || enrichDb == "others") { + next + } # just for backward compatibility + if (!enrichDb %in% geneSetInfo$name) { + warning("Database ", enrichDb, " is not supported") + next + } + # get the ID type of the enriched database, such as entrezgene or phosphsiteSeq + thisStandardId <- filter(geneSetInfo, .data$name == enrichDb)[[1, "idType"]] + if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { + stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") + } + standardId <- thisStandardId + + ######### Read GMT file from the existing database########### + if (startsWith(hostName, "file://")) { + gmtPath <- removeFileProtocol(file.path(hostName, "geneset", paste0(paste(organism, enrichDb, standardId, sep = "_"), ".gmt"))) + thisGeneSet <- readGmt(gmtPath) + thisGeneSet$database <- enrichDb # add a column for database source + geneSet <- rbind(geneSet, thisGeneSet) + } else { + gmtUrl <- modify_url(file.path(hostName, "api", "geneset"), query = list(organism = organism, database = enrichDb, standardId = standardId, fileType = "gmt")) + thisGeneSet <- readGmt(gmtUrl, cache = cache) + thisGeneSet$database <- enrichDb + oriCnt <- nrow(thisGeneSet) + thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) + if (nrow(thisGeneSet) < oriCnt) { + warning(paste("Duplicate gene set names in", enrichDb, "have been ignored.")) + } + geneSet <- rbind(geneSet, thisGeneSet) + } + + ######### Read the description file############# + # geneSetDes <- rbind(geneSetDes, .loadGeneSetData(hostName, organism, enrichDb, standardId, "des", cache)) + thisGeneSetDes <- .loadGeneSetData(hostName, organism, enrichDb, standardId, "des", cache) + if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { + thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) + } + geneSetDes <- rbind(geneSetDes, thisGeneSetDes) + + ########### Try to load the DAG file################# + # assignment considering possible return of NULL + # list[] <- NULL will delete the element + geneSetDag[enrichDb] <- list(.loadGeneSetData(hostName, organism, enrichDb, standardId, "dag", cache)) + + ########### Try to load the network file if the gene sets are generated from the network########## + geneSetNet[enrichDb] <- list(.loadGeneSetData(hostName, organism, enrichDb, standardId, "net", cache)) + } + + # load local database files + for (i in seq_along(enrichDatabaseFile)) { + enrichDbFile <- enrichDatabaseFile[[i]] + if (is.null(enrichDbFile)) { + next + } + + thisGeneSet <- idMapping(organism = organism, dataType = "gmt", inputGeneFile = enrichDbFile, sourceIdType = enrichDatabaseType, targetIdType = NULL, mappingOutput = FALSE, cache = cache, hostName = hostName) + thisStandardId <- thisGeneSet$standardId # should be just enrichDatabaseType here + if (!is.null(standardId) && standardId != thisStandardId && !isMultiOmics) { + stop("Databases have inconsistent ID types. Mixed gene annotation databases with phosphosite databases?") + } + standardId <- thisStandardId + + thisGeneSet <- thisGeneSet$mapped %>% + select(.data$geneSet, .data$description, gene = standardId) %>% + distinct() + + # load local description files + enrichDbDesFile <- enrichDatabaseDescriptionFile[[i]] + if (!is.null(enrichDbDesFile)) { + thisGeneSetDes <- .loadEnrichDatabaseDescriptionFile(thisGeneSet, enrichDbDesFile) + if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { + thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) + } + geneSetDes <- rbind(geneSetDes, thisGeneSetDes) + } + + fileName <- gsub(".gmt", "", basename(enrichDbFile), fixed = TRUE) + thisGeneSet$database <- fileName + oriCnt <- nrow(thisGeneSet) + thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) + if (nrow(thisGeneSet) < oriCnt) { + warning(paste("Duplicate gene set names in", fileName, "have been ignored.")) + } + geneSet <- rbind(geneSet, thisGeneSet) + + geneSetDag[fileName] <- list(NULL) # correct way to assign NULL to list element + geneSetNet[fileName] <- list(NULL) + } + } else { # custom organisms + for (i in seq_along(enrichDatabaseFile)) { + enrichDbFile <- enrichDatabaseFile[[i]] + if (is.null(enrichDbFile)) { + next + } + thisGeneSet <- readGmt(enrichDbFile) + + enrichDbDesFile <- enrichDatabaseDescriptionFile[[i]] + if (!is.null(enrichDbDesFile)) { + thisGeneSetDes <- .loadEnrichDatabaseDescriptionFile(thisGeneSet, enrichDbDesFile) + if (!is.null(thisGeneSetDes) && !is.null(geneSetDes)) { + thisGeneSetDes <- thisGeneSetDes %>% filter(!(.data$geneSet %in% unique(!!geneSetDes$geneSet))) + } + geneSetDes <- rbind(geneSetDes, thisGeneSetDes) + } + fileName <- gsub(".gmt", "", basename(enrichDbFile), fixed = TRUE) + thisGeneSet$database <- fileName + oriCnt <- nrow(thisGeneSet) + thisGeneSet <- thisGeneSet %>% filter(!(.data$geneSet %in% unique(!!geneSet$geneSet))) + if (nrow(thisGeneSet) < oriCnt) { + warning(paste("Duplicate gene set names in", enrichDb, "have been ignored.")) + } + geneSet <- rbind(geneSet, thisGeneSet) + + geneSetDag[fileName] <- list(NULL) + geneSetNet[fileName] <- list(NULL) + } + } + if (is.null(geneSet)) { + stop(enrichDatabaseError(type = "empty")) + } + # unlist if just one database, for backward compatibility + if (length(geneSetDag) == 1) { + geneSetDag <- geneSetDag[[1]] + } + if (length(geneSetNet) == 1) { + geneSetNet <- geneSetNet[[1]] + } + if (length(unique(geneSet$database)) == 1) { + # remove database column for single source + geneSet$database <- NULL + } + re <- list(geneSet = geneSet, geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, standardId = standardId) + return(re) } #' @importFrom httr content #' @importFrom readr read_tsv -.loadGeneSetData <- function(hostName, organism, database, standardId, fileType, cache=NULL) { - # read gene set files from API or returns NULL - if (startsWith(hostName, "file://")) { - geneSetPath <- removeFileProtocol(file.path(hostName, "geneset", paste(paste(organism, database, standardId, sep="_"), fileType, sep="."))) - if (file.exists(geneSetPath)) { - geneSetData <- read_tsv(geneSetPath, col_names=FALSE, col_types="cc", quote="") - } else { - geneSetData <- NULL - } - } else { - geneSetUrl <- file.path(hostName,"api","geneset") - response <- cacheUrl(geneSetUrl, cache=cache, query=list(organism=organism, database=database, standardId=standardId, fileType=fileType)) - if (response$status_code == 200) { - geneSetData <- read_tsv(content(response), col_names=FALSE, col_types="cc", quote="") - } else { - geneSetData <- NULL - } - } - if (!is.null(geneSetData) && fileType == "des") { - colnames(geneSetData) <- c("geneSet", "description") - geneSetData <- geneSetData %>% distinct(.data$geneSet, .keep_all=TRUE) - } - return(geneSetData) +.loadGeneSetData <- function(hostName, organism, database, standardId, fileType, cache = NULL) { + # read gene set files from API or returns NULL + if (startsWith(hostName, "file://")) { + geneSetPath <- removeFileProtocol(file.path(hostName, "geneset", paste(paste(organism, database, standardId, sep = "_"), fileType, sep = "."))) + if (file.exists(geneSetPath)) { + geneSetData <- read_tsv(geneSetPath, col_names = FALSE, col_types = "cc", quote = "") + } else { + geneSetData <- NULL + } + } else { + geneSetUrl <- file.path(hostName, "api", "geneset") + response <- cacheUrl(geneSetUrl, cache = cache, query = list(organism = organism, database = database, standardId = standardId, fileType = fileType)) + if (response$status_code == 200) { + geneSetData <- read_tsv(content(response), col_names = FALSE, col_types = "cc", quote = "") + } else { + geneSetData <- NULL + } + } + if (!is.null(geneSetData) && fileType == "des") { + colnames(geneSetData) <- c("geneSet", "description") + geneSetData <- geneSetData %>% distinct(.data$geneSet, .keep_all = TRUE) + } + return(geneSetData) } #' @importFrom readr read_tsv #' @importFrom tools file_ext .loadEnrichDatabaseDescriptionFile <- function(geneSet, enrichDatabaseDescriptionFile) { - if (file_ext(enrichDatabaseDescriptionFile) != "des") { - warning(descriptionFileError("format")) - return(NULL) - } else { - geneSetDes <- read_tsv(enrichDatabaseDescriptionFile, col_names=c("geneSet", "description"), col_types="cc", quote="") %>% - distinct(geneSet, .keep_all=TRUE) - if (ncol(geneSetDes)!=2) { - warning(descriptionFileError("columnNum")) - return(NULL) - } else { - if (length(intersect(unique(geneSet$geneSet), geneSetDes$geneSet)) < 0.6 * length(unique(geneSet[,1]))) { - warning(descriptionFileError("overlap")) - return(NULL) - } else { - return(geneSetDes) - } - } - } + if (file_ext(enrichDatabaseDescriptionFile) != "des") { + warning(descriptionFileError("format")) + return(NULL) + } else { + geneSetDes <- read_tsv(enrichDatabaseDescriptionFile, col_names = c("geneSet", "description"), col_types = "cc", quote = "") %>% + distinct(geneSet, .keep_all = TRUE) + if (ncol(geneSetDes) != 2) { + warning(descriptionFileError("columnNum")) + return(NULL) + } else { + if (length(intersect(unique(geneSet$geneSet), geneSetDes$geneSet)) < 0.6 * length(unique(geneSet[, 1]))) { + warning(descriptionFileError("overlap")) + return(NULL) + } else { + return(geneSetDes) + } + } + } } diff --git a/R/multiOraEnrichment.R b/R/multiOraEnrichment.R index 729df0c..3506998 100644 --- a/R/multiOraEnrichment.R +++ b/R/multiOraEnrichment.R @@ -3,6 +3,8 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10) { + # INFO: Code is almost identical to oraEnrichment.R, but modified to work with lists. + # Additionally, have to get a special intG for the meta-analysis, since it is a merge of the other intG's. for (i in seq_along(referenceGene)) { referenceGene[[i]] <- intersect(referenceGene[[i]], geneSet[[i]]$gene) geneSet[[i]] <- filter(geneSet[[i]], .data$gene %in% referenceGene[[i]]) @@ -71,8 +73,8 @@ multiOraEnrichment <- function(interestGene, referenceGene, geneSet, minNum = 10 for (i in seq_along(rust_result_df)) { if (i == 1) { # Meta-analysis enrichedResult <- rust_result_df[[i]] %>% - left_join(met_intG, by = "geneSet") %>% # get overlapping gene IDs - arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) + left_join(met_intG, by = "geneSet") %>% # get overlapping gene IDs + arrange(.data$FDR, .data$pValue, .data$enrichmentRatio) enrichedResult$overlap <- sapply(enrichedResult$overlapId, function(x) { length(unlist(strsplit(x, ";"))) }) diff --git a/man/WebGestaltRMultiOmicsOra.Rd b/man/WebGestaltRMultiOmicsOra.Rd new file mode 100644 index 0000000..23c42f9 --- /dev/null +++ b/man/WebGestaltRMultiOmicsOra.Rd @@ -0,0 +1,167 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/WebGestaltRMultiomicsOra.R +\name{WebGestaltRMultiOmicsOra} +\alias{WebGestaltRMultiOmicsOra} +\title{Multi-omics ORA} +\usage{ +WebGestaltRMultiOmicsOra( + analyteLists = NULL, + analyteListFiles = NULL, + analyteTypes = NULL, + enrichMethod = "ORA", + organism = "hsapiens", + enrichDatabase = NULL, + enrichDatabaseFile = NULL, + enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, + collapseMethod = "mean", + minNum = 10, + maxNum = 500, + fdrMethod = "BH", + sigMethod = "fdr", + fdrThr = 0.05, + topThr = 10, + reportNum = 100, + setCoverNum = 10, + perNum = 1000, + gseaP = 1, + isOutput = TRUE, + outputDirectory = getwd(), + projectName = NULL, + dagColor = "binary", + nThreads = 1, + cache = NULL, + hostName = "https://www.webgestalt.org/", + useWeightedSetCover = TRUE, + useAffinityPropagation = FALSE, + usekMedoid = FALSE, + kMedoid_k = 25, + referenceLists = NULL, + referenceListFiles = NULL, + referenceTypes = NULL +) +} +\arguments{ +\item{analyteLists}{\code{vector} of the ID type of the corresponding interesting analyte list. The supported ID types of +WebGestaltR for the selected organism can be found by the function \code{listIdType}. If +the \code{organism} is \code{others}, users do not need to set this parameter. The length of \code{analyteLists} should be +the same as the length of \code{analyteListFiles} or \code{analyteLists}.} + +\item{analyteListFiles}{If \code{enrichMethod} is \code{ORA}, the extension of +the \code{analyteListFiles} should be \code{txt} and each file can only contain one column: +the interesting analyte list. If \code{enrichMethod} is \code{GSEA}, the extension of the +\code{analyteListFiles} should be \code{rnk} and the files should contain two columns +separated by tab: the analyte list and the corresponding scores.} + +\item{analyteTypes}{a vector containing the ID types of the analyte lists.} + +\item{enrichMethod}{Enrichment methods: \code{ORA}or \code{GSEA}.} + +\item{organism}{Currently, WebGestaltR supports 12 organisms. Users can use the function +\code{listOrganism} to check available organisms. Users can also input \code{others} to +perform the enrichment analysis for other organisms not supported by WebGestaltR. For +other organisms, users need to provide the functional categories, interesting list and +reference list (for ORA method). Because WebGestaltR does not perform the ID mapping for +the other organisms, the above data should have the same ID type.} + +\item{enrichDatabase}{The functional categories for the enrichment analysis. Users can use +the function \code{listGeneSet} to check the available functional databases for the +selected organism. Multiple databases in a vector are supported for ORA and GSEA.} + +\item{enrichDatabaseFile}{Users can provide one or more GMT files as the functional +category for enrichment analysis. The extension of the file should be \code{gmt} and the +first column of the file is the category ID, the second one is the external link for the +category. Genes annotated to the category are from the third column. All columns are +separated by tabs. The GMT files will be combined with \code{enrichDatabase}.} + +\item{enrichDatabaseType}{The ID type of the genes in the \code{enrichDatabaseFile}. +If users set \code{organism} as \code{others}, users do not need to set this ID type because +WebGestaltR will not perform ID mapping for other organisms. The supported ID types of +WebGestaltR for the selected organism can be found by the function \code{listIdType}.} + +\item{enrichDatabaseDescriptionFile}{Users can also provide description files for the custom +\code{enrichDatabaseFile}. The extension of the description file should be \code{des}. The +description file contains two columns: the first column is the category ID that should be +exactly the same as the category ID in the custom \code{enrichDatabaseFile} and the second +column is the description of the category. All columns are separated by tabs.} + +\item{collapseMethod}{The method to collapse duplicate IDs with scores. \code{mean}, +\code{median}, \code{min} and \code{max} represent the mean, median, minimum and maximum +of scores for the duplicate IDs.} + +\item{minNum}{WebGestaltR will exclude the categories with the number of annotated genes +less than \code{minNum} for enrichment analysis. The default is \code{10}.} + +\item{maxNum}{WebGestaltR will exclude the categories with the number of annotated genes +larger than \code{maxNum} for enrichment analysis. The default is \code{500}.} + +\item{fdrMethod}{For the ORA method, WebGestaltR supports five FDR methods: \code{holm}, +\code{hochberg}, \code{hommel}, \code{bonferroni}, \code{BH} and \code{BY}. The default +is \code{BH}.} + +\item{sigMethod}{Two methods of significance are available in WebGestaltR: \code{fdr} and +\code{top}. \code{fdr} means the enriched categories are identified based on the FDR and +\code{top} means all categories are ranked based on FDR and then select top categories +as the enriched categories. The default is \code{fdr}.} + +\item{fdrThr}{The significant threshold for the \code{fdr} method. The default is \code{0.05}.} + +\item{topThr}{The threshold for the \code{top} method. The default is \code{10}.} + +\item{reportNum}{The number of enriched categories visualized in the final report. The default +is \code{20}. A larger \code{reportNum} may be slow to render in the report.} + +\item{setCoverNum}{The number of expected gene sets after set cover to reduce redundancy. +It could get fewer sets if the coverage reaches 100\%. The default is \code{10}.} + +\item{perNum}{The number of permutations for the GSEA method. The default is \code{1000}.} + +\item{gseaP}{The exponential scaling factor of the phenotype score. The default is \code{1}. +When p=0, ES reduces to standard K-S statistics (See original paper for more details).} + +\item{isOutput}{If \code{isOutput} is TRUE, WebGestaltR will create a folder named by +the \code{projectName} and save the results in the folder. Otherwise, WebGestaltR will +only return an R \code{data.frame} object containing the enrichment results. If +hundreds of gene list need to be analyzed simultaneously, it is better to set +\code{isOutput} to \code{FALSE}. The default is \code{TRUE}.} + +\item{outputDirectory}{The output directory for the results.} + +\item{projectName}{The name of the project. If \code{projectName} is \code{NULL}, +WebGestaltR will use time stamp as the project name.} + +\item{dagColor}{If \code{dagColor} is \code{binary}, the significant terms in the DAG +structure will be colored by steel blue for ORA method or steel blue (positive related) +and dark orange (negative related) for GSEA method. If \code{dagColor} is \code{continous}, +the significant terms in the DAG structure will be colored by the color gradient based on +corresponding FDRs.} + +\item{nThreads}{The number of cores to use for GSEA and set cover, and in batch function.} + +\item{cache}{A directory to save data cache for reuse. Defaults to \code{NULL} and disabled.} + +\item{hostName}{The server URL for accessing data. Mostly for development purposes.} + +\item{useWeightedSetCover}{Use weighted set cover for ORA. Defaults to \code{TRUE}.} + +\item{useAffinityPropagation}{Use affinity propagation for ORA. Defaults to \code{FALSE}.} + +\item{usekMedoid}{Use k-medoid for ORA. Defaults to \code{TRUE}.} + +\item{kMedoid_k}{The number of clusters for k-medoid. Defaults to \code{25}.} + +\item{referenceLists}{For the ORA method, users can also use an R object as the reference +gene list. \code{referenceLists} should be an R \code{vector} object containing the +reference gene list.} + +\item{referenceListFiles}{For the ORA method, the users need to upload the reference gene +list. The extension of the \code{referenceListFile} should be \code{txt} and the file can +only contain one column: the reference gene list.} + +\item{referenceTypes}{Vector of the ID types of the reference lists. The supported ID types +of WebGestaltR for the selected organism can be found by the function \code{listIdType}. +If the \code{organism} is \code{others}, users do not need to set this parameter.} +} +\description{ +Multi-omics ORA +} From 7769caf6cea9b2d7b941aac58df7c83e5efdfb48 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Thu, 30 Nov 2023 23:48:50 -0600 Subject: [PATCH 78/82] add start of metareport function --- R/WebGestaltRMultiOmics.R | 13 ++- R/WebGestaltRMultiomicsOra.R | 10 +-- R/createMetaReport.R | 135 ++++++++++++++++++++++++++++++++ man/WebGestaltRMultiOmics.Rd | 5 +- man/WebGestaltRMultiOmicsOra.Rd | 5 +- 5 files changed, 159 insertions(+), 9 deletions(-) create mode 100644 R/createMetaReport.R diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index 3a1a262..f1a12fe 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -103,6 +103,7 @@ #' @param mergeMethod The method to merge the results from multiple omics (options: \code{mean}, \code{max}). Only used if \code{isMetaAnalysis = FALSE}. Defaults to \code{mean}. #' @param normalizationMethod The method to normalize the results from multiple omics (options: \code{rank}, \code{median}, \code{mean}). Only used if \code{isMetaAnalysis = FALSE}. #' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}. +#' @param listNames The names of the analyte lists. #' @export WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, analyteTypes = NULL, enrichMethod = "ORA", organism = "hsapiens", enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, enrichDatabaseDescriptionFile = NULL, @@ -111,7 +112,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, gseaPlotFormat = "png", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 25, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank", - referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, listNames = NULL) { VALID_MERGE_METHODS <- c("mean", "max") VALID_NORM_METHODS <- c("rank", "median", "mean") VALID_ENRICH_METHODS <- c("ORA", "GSEA") @@ -167,6 +168,14 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, if (length(analyteTypes) == 1) { stop("Performing multiomics analysis requires multiple analyte types. If you only have one analyte type, use the WebGestaltR(...) function instead.") } + if (is.null(listNames)) { + max_num <- max(length(analyteLists), length(analyteListFiles)) + listNames <- paste0("List", 1:max_num, collapse = "") + } else if (length(listNames) != length(analyteTypes)) { + warning("listNames must be the same length as analyteTypes. Defaulting to List1, List2, etc.") + max_num <- max(length(analyteLists), length(analyteListFiles)) + listNames <- paste0("List", 1:max_num) + } if (enrichMethod == "ORA") { WebGestaltRMultiOmicsOra(analyteLists = analyteLists, analyteListFiles = analyteListFiles, analyteTypes = analyteTypes, organism = organism, @@ -176,7 +185,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, setCoverNum = setCoverNum, perNum = perNum, isOutput = isOutput, outputDirectory = outputDirectory, projectName = projectName, dagColor = dagColor, nThreads = nThreads, cache = cache, hostName = hostName, useWeightedSetCover = useWeightedSetCover, useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k, referenceLists = referenceLists, - referenceListFiles = referenceListFiles, referenceTypes = referenceTypes) + referenceListFiles = referenceListFiles, referenceTypes = referenceTypes, listNames = listNames) ## Meta-analysis } else if (enrichMethod == "GSEA") { if (isMetaAnalysis) { diff --git a/R/WebGestaltRMultiomicsOra.R b/R/WebGestaltRMultiomicsOra.R index 22fda21..bdd725a 100644 --- a/R/WebGestaltRMultiomicsOra.R +++ b/R/WebGestaltRMultiomicsOra.R @@ -6,7 +6,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL topThr = 10, reportNum = 100, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(), projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 25, - referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL) { + referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, listNames = null) { projectDir <- file.path(outputDirectory, paste0("Project_", projectName)) cat("Performing multi-omics ORA\nLoading the functional categories...\n") all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) @@ -130,7 +130,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL } if (isOutput) { - write_tsv(outputEnrichedSig, file.path(projectDir, paste0("enrichment_results_", projectName, "_list", i - 1, ".txt"))) + write_tsv(outputEnrichedSig, file.path(projectDir, paste0("enrichment_results_", projectName, "_", listNames[i - 1], ".txt"))) idsInSet <- sapply(enrichedSig$overlapId, strsplit, split = ";") names(idsInSet) <- enrichedSig$geneSet minusLogP <- -log(enrichedSig$pValue) @@ -148,19 +148,19 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL kRes <- kMedoid(idsInSet, minusLogP, maxK = kMedoid_k) } if (!is.null(apRes)) { - writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, "_list", i - 1, ".txt"))) + writeLines(sapply(apRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_ap_clusters_", projectName, "_", listNames[i - 1], ".txt"))) } else { apRes <- NULL } clusters$ap <- apRes if (!is.null(kRes)) { - writeLines(sapply(kRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, "_list", i - 1, ".txt"))) + writeLines(sapply(kRes$clusters, paste, collapse = "\t"), file.path(projectDir, paste0("enriched_geneset_kmedoid_clusters_", projectName, "_", listNames[i - 1], ".txt"))) } else { kRes <- NULL } clusters$km <- kRes if (!is.null(wscRes$topSets)) { - writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, "_list", i - 1, ".txt"))) + writeLines(c(paste0("# Coverage: ", wscRes$coverage), wscRes$topSets), file.path(projectDir, paste0("enriched_geneset_wsc_topsets_", projectName, "_", listNames[i - 1], ".txt"))) clusters$wsc <- list(representatives = wscRes$topSets, coverage = wscRes$coverage) } else { clusters$wsc <- NULL diff --git a/R/createMetaReport.R b/R/createMetaReport.R new file mode 100644 index 0000000..670d85d --- /dev/null +++ b/R/createMetaReport.R @@ -0,0 +1,135 @@ +#' createMetaReport +#' +#' Generate HTML report for ORA and GSEA MetaAnalysis +#' +#' @importFrom jsonlite toJSON +#' @importFrom whisker whisker.render +#' +#' @keywords internal +#' +createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", projectName, enrichMethod, geneSet, geneSetDes, + geneSetDag, geneSetNet, interestingGeneMap, referenceGeneList, enrichedSig, geneTables, clusters, + background, enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, + collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, + referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, + reportNum = 20, perNum = 1000, p = 1, dagColor = "binary") { + outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) + + # if hostname starts with "file://", it is used as WebGestaltReporter + if (startsWith(hostName, "file://")) { + # change back hostName for web assets and browsers will cache it. + hostName <- "https://www.webgestalt.org" + } + + numAnnoRefUserId <- NULL + dagJson <- list() + allEnrichedSig <- enrichedSig + repAdded <- FALSE + if (organism!="others") { + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } + standardId <- interestingGeneMap$standardId + if (enrichMethod == 'ORA') { + interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) + numAnnoRefUserId <- length(intersect(interestGeneList, + intersect(referenceGeneList, geneSet$gene))) + } + + ##### Summary Tab ######## + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + + ########### GOSlim summary ######################### + if(standardId=="entrezgene"){ + bodyContent <- paste(bodyContent, goSlimReport(projectName), sep='\n') + } + + ############ Enrichment result ################## + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') + if (!is.null(geneSetDag)) { + if (!is.vector(geneSetDag)) { + # for backward compatibility, it is unlisted for single dataset + geneSetDag <- list(geneSetDag) + names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed=TRUE)) + } + for (name in names(geneSetDag)) { + dag <- geneSetDag[[name]] + if (is.null(dag)) { + #dagJson[[name]] <- list(NULL) + next + } + dagRes <- expandDag(enrichedSig$geneSet, dag) + dagEdges <- dagRes$edges + dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) + dagJson[[name]] <- c(dagEdges, dagNodes) + } + } + } + } else { + ########### Organism is others. No mapping information ############# + ############# Summary for the analysis ################### + if (enrichMethod == "ORA") { + numAnnoRefUserId <- length(intersect(interestingGeneMap, + intersect(referenceGeneList, geneSet$gene))) + } + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } + + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + + ############## Enrich Result ################ + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') + } + standardId <- NULL + } + if (is.null(enrichedSig)) { + enrichedSig <- data.frame() + } + if (is.null(background)) { + background <- data.frame() + } + version <- packageVersion("WebGestaltR") + # use major and minor version numbers for JS lib. If API changes, version should be bumped + # patch number should not matter + version <- paste(version[1, 1], version[1, 2], sep = ".") + hasGeneSetDag = !is.null(geneSetDag) + hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape + allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { + gsub(".gmt", "", basename(x), fixed=TRUE) + })))) # sapply on NULL will return a list + + header <- readLines(system.file("templates/header.mustache", package="WebGestaltR")) + footer <- readLines(system.file("templates/footer.mustache", package="WebGestaltR")) + template <- readLines(system.file("templates/template.mustache", package="WebGestaltR")) + data <- list(hostName=hostName, bodyContent=bodyContent, + organism=organism, enrichDatabaseJson=toJSON(allDbNames, auto_unbox=TRUE), + sigJson=toJSON(enrichedSig, digits=16), insigJson=toJSON(background, digits=16), + dagJson=toJSON(dagJson, auto_unbox=TRUE), hasGeneSetDag=hasGeneSetDag, version=version, + clusterJson=toJSON(clusters), hasCytoscape=hasCytoscape, + geneTableJson=toJSON(geneTables), standardId=standardId, numAnnoRefUserId=numAnnoRefUserId, + methodIsGsea=enrichMethod=="GSEA", hasGeneSetDes=!is.null(geneSetDes) + ) + cat(whisker.render(template, data, partials=list(header=header, footer=footer)), file=outputHtmlFile) +} diff --git a/man/WebGestaltRMultiOmics.Rd b/man/WebGestaltRMultiOmics.Rd index 8fd7c7e..c33c54a 100644 --- a/man/WebGestaltRMultiOmics.Rd +++ b/man/WebGestaltRMultiOmics.Rd @@ -43,7 +43,8 @@ WebGestaltRMultiOmics( normalizationMethod = "rank", referenceLists = NULL, referenceListFiles = NULL, - referenceTypes = NULL + referenceTypes = NULL, + listNames = NULL ) } \arguments{ @@ -183,6 +184,8 @@ only contain one column: the reference gene list.} \item{referenceTypes}{Vector of the ID types of the reference lists. The supported ID types of WebGestaltR for the selected organism can be found by the function \code{listIdType}. If the \code{organism} is \code{others}, users do not need to set this parameter.} + +\item{listNames}{The names of the analyte lists.} } \description{ Perform multi-omics analysis using WebGestaltR diff --git a/man/WebGestaltRMultiOmicsOra.Rd b/man/WebGestaltRMultiOmicsOra.Rd index 23c42f9..41f9347 100644 --- a/man/WebGestaltRMultiOmicsOra.Rd +++ b/man/WebGestaltRMultiOmicsOra.Rd @@ -38,7 +38,8 @@ WebGestaltRMultiOmicsOra( kMedoid_k = 25, referenceLists = NULL, referenceListFiles = NULL, - referenceTypes = NULL + referenceTypes = NULL, + listNames = null ) } \arguments{ @@ -161,6 +162,8 @@ only contain one column: the reference gene list.} \item{referenceTypes}{Vector of the ID types of the reference lists. The supported ID types of WebGestaltR for the selected organism can be found by the function \code{listIdType}. If the \code{organism} is \code{others}, users do not need to set this parameter.} + +\item{listNames}{The names of the analyte lists.} } \description{ Multi-omics ORA From 6e787760375cdbf27270012eea680eee48fa819c Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Fri, 1 Dec 2023 10:09:06 -0600 Subject: [PATCH 79/82] fix list names --- R/WebGestaltRMultiOmics.R | 8 ++++--- man/createMetaReport.Rd | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 man/createMetaReport.Rd diff --git a/R/WebGestaltRMultiOmics.R b/R/WebGestaltRMultiOmics.R index f1a12fe..036ca78 100644 --- a/R/WebGestaltRMultiOmics.R +++ b/R/WebGestaltRMultiOmics.R @@ -169,14 +169,16 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL, stop("Performing multiomics analysis requires multiple analyte types. If you only have one analyte type, use the WebGestaltR(...) function instead.") } if (is.null(listNames)) { - max_num <- max(length(analyteLists), length(analyteListFiles)) - listNames <- paste0("List", 1:max_num, collapse = "") + max_num <- length(analyteTypes) + listNames <- paste0("List", 1:max_num) } else if (length(listNames) != length(analyteTypes)) { warning("listNames must be the same length as analyteTypes. Defaulting to List1, List2, etc.") - max_num <- max(length(analyteLists), length(analyteListFiles)) + max_num <- length(analyteTypes) listNames <- paste0("List", 1:max_num) } + listNames <- sapply(listNames, sanitizeFileName) + if (enrichMethod == "ORA") { WebGestaltRMultiOmicsOra(analyteLists = analyteLists, analyteListFiles = analyteListFiles, analyteTypes = analyteTypes, organism = organism, enrichDatabase = enrichDatabase, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, diff --git a/man/createMetaReport.Rd b/man/createMetaReport.Rd new file mode 100644 index 0000000..c07fcb5 --- /dev/null +++ b/man/createMetaReport.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createMetaReport.R +\name{createMetaReport} +\alias{createMetaReport} +\title{createMetaReport} +\usage{ +createMetaReport( + hostName, + outputDirectory, + organism = "hsapiens", + projectName, + enrichMethod, + geneSet, + geneSetDes, + geneSetDag, + geneSetNet, + interestingGeneMap, + referenceGeneList, + enrichedSig, + geneTables, + clusters, + background, + enrichDatabase = NULL, + enrichDatabaseFile = NULL, + enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, + interestGeneFile = NULL, + interestGene = NULL, + interestGeneType = NULL, + collapseMethod = "mean", + referenceGeneFile = NULL, + referenceGene = NULL, + referenceGeneType = NULL, + referenceSet = NULL, + minNum = 10, + maxNum = 500, + fdrMethod = "BH", + sigMethod = "fdr", + fdrThr = 0.05, + topThr = 10, + reportNum = 20, + perNum = 1000, + p = 1, + dagColor = "binary" +) +} +\description{ +Generate HTML report for ORA and GSEA MetaAnalysis +} +\keyword{internal} From c84f1be566486145c67cbd1ad3ec87ff27ccd1d7 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 4 Dec 2023 10:23:54 -0600 Subject: [PATCH 80/82] get start of working template --- R/WebGestaltRMultiomicsOra.R | 37 +++- R/createMetaReport.R | 303 ++++++++++++++++---------- inst/templates/meta_template.mustache | 122 +++++++++++ man/createMetaReport.Rd | 45 ++-- 4 files changed, 367 insertions(+), 140 deletions(-) create mode 100644 inst/templates/meta_template.mustache diff --git a/R/WebGestaltRMultiomicsOra.R b/R/WebGestaltRMultiomicsOra.R index bdd725a..7b59da1 100644 --- a/R/WebGestaltRMultiomicsOra.R +++ b/R/WebGestaltRMultiomicsOra.R @@ -12,7 +12,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName) cat("Loading the ID lists...\n") interest_lists <- list() - interestGeneMaps <- list() + interestGeneMaps <- list() if (is.null(analyteLists)) { for (i in seq_along(analyteListFiles)) { interestingGeneMap <- loadInterestGene( @@ -86,6 +86,11 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL return(NULL) } cat("Generating the report...\n") + geneSetDags <- all_sets[["geneSetDag"]] + geneSetNets <- all_sets[["geneSetNet"]] + enrichSigs <- list() + enrichSigs[[1]] <- NULL + clusters_list <- list() dir.create(projectDir, showWarnings = FALSE) for (i in 2:length(oraRes$enriched)) { interestingGeneMap <- interestGeneMaps[[i - 1]] @@ -165,7 +170,37 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL } else { clusters$wsc <- NULL } + clusters_list[[i]] <- clusters } } + enrichSigs[[i]] <- enrichedSig } + if (isOutput) { + ############## Create report ################## + cat("Generate the final report...\n") + createMetaReport( + hostName = hostName, outputDirectory = outputDirectory, organism = organism, + projectName = projectName, enrichMethod = enrichMethod, geneSet_list = all_sets[["geneSet"]], + geneSetDes_list = geneSetDes, geneSetDag_list = geneSetDags, geneSetNet_list = geneSetNets, + interestingGeneMap_list = interestGeneMaps, referenceGeneList_list = reference_lists, + enrichedSig_list = enrichSigs, background_list = insig, geneTables_list = geneTables, + clusters_list = clusters_list, enrichDatabase_list = enrichDatabase, + enrichDatabaseFile_list = enrichDatabaseFile, enrichDatabaseType_list = enrichDatabaseType, + enrichDatabaseDescriptionFile_list = enrichDatabaseDescriptionFile, + interestGeneFile_list = analyteListFiles, interestGene_list = interest_lists, + interestGeneType_list = analyteTypes, collapseMethod = collapseMethod, + referenceGeneFile_list = referenceListFiles, referenceGene_list = referenceLists, + referenceGeneType_list = referenceTypes, referenceSet_list = referenceLists, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, + topThr = topThr, reportNum = reportNum, dagColor = dagColor, listNames = listNames + ) + + cwd <- getwd() + setwd(projectDir) + zip(paste0("Project_", projectName, ".zip"), ".", flags = "-rq") + setwd(cwd) + + cat("Results can be found in the ", projectDir, "!\n", sep = "") + } + return(outputEnrichedSig) } diff --git a/R/createMetaReport.R b/R/createMetaReport.R index 670d85d..08d5948 100644 --- a/R/createMetaReport.R +++ b/R/createMetaReport.R @@ -7,129 +7,198 @@ #' #' @keywords internal #' -createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", projectName, enrichMethod, geneSet, geneSetDes, - geneSetDag, geneSetNet, interestingGeneMap, referenceGeneList, enrichedSig, geneTables, clusters, - background, enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, - enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, - collapseMethod = "mean", referenceGeneFile = NULL, referenceGene = NULL, referenceGeneType = NULL, - referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, - reportNum = 20, perNum = 1000, p = 1, dagColor = "binary") { - outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) +createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", projectName, enrichMethod, geneSet_list, geneSetDes_list, + geneSetDag_list, geneSetNet_list, interestingGeneMap_list, referenceGeneList_list, enrichedSig_list, geneTables_list, clusters_list, + background_list, enrichDatabase_list = NULL, enrichDatabaseFile_list = NULL, enrichDatabaseType_list = NULL, + enrichDatabaseDescriptionFile_list = NULL, interestGeneFile_list = NULL, interestGene_list = NULL, + interestGeneType_list = NULL, collapseMethod = "mean", referenceGeneFile_list = NULL, referenceGene_list = NULL, + referenceGeneType_list = NULL, referenceSet_list = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, + topThr = 10, reportNum = 20, perNum = 1000, p = 1, dagColor = "binary", listNames = NULL) { + outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) - # if hostname starts with "file://", it is used as WebGestaltReporter - if (startsWith(hostName, "file://")) { - # change back hostName for web assets and browsers will cache it. - hostName <- "https://www.webgestalt.org" - } + # if hostname starts with "file://", it is used as WebGestaltReporter + if (startsWith(hostName, "file://")) { + # change back hostName for web assets and browsers will cache it. + hostName <- "https://www.webgestalt.org" + } + tabs <- c() + for (i in seq_along(interestingGeneMap_list)) { + if (i == 0) { - numAnnoRefUserId <- NULL - dagJson <- list() - allEnrichedSig <- enrichedSig - repAdded <- FALSE - if (organism!="others") { - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } - standardId <- interestingGeneMap$standardId - if (enrichMethod == 'ORA') { - interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) - numAnnoRefUserId <- length(intersect(interestGeneList, - intersect(referenceGeneList, geneSet$gene))) - } + } else { + print(paste("Processing dataset", i, "of", length(interestingGeneMap_list))) + enrichedSig <- enrichedSig_list[[i + 1]] + geneSet <- geneSet_list[[i]] + if (!is.null(geneSetDes_list) && length(geneSetDes_list) > 0) { + geneSetDes <- geneSetDes_list[[i]] + } else { + geneSetDes <- NULL + } + if (!is.null(geneSetDag_list) && length(geneSetDag_list) > 0) { + geneSetDag <- geneSetDag_list[[i]] + } else { + geneSetDag <- NULL + } + if (!is.null(geneSetNet_list) && length(geneSetNet_list) > 0) { + geneSetNet <- geneSetNet_list[[i]] + } else { + geneSetNet <- NULL + } + interestingGeneMap <- interestingGeneMap_list[[i]] + referenceGeneList <- referenceGeneList_list[[i]] + geneTables <- geneTables_list[[i]] + clusters <- clusters_list[[i]] + background <- background_list[[i]] + if (length(enrichDatabase_list) == 1) { + enrichDatabase <- enrichDatabase_list + } else { + enrichDatabase <- enrichDatabase_list[[i]] + } + if (length(enrichDatabaseFile_list) == 1) { + enrichDatabaseFile <- enrichDatabaseFile_list + } else { + enrichDatabaseFile <- enrichDatabaseFile_list[[i]] + } + if (length(enrichDatabaseType_list) == 1) { + enrichDatabaseType <- enrichDatabaseType_list + } else { + enrichDatabaseType <- enrichDatabaseType_list[[i]] + } + if (length(enrichDatabaseDescriptionFile_list) == 1) { + enrichDatabaseDescriptionFile <- enrichDatabaseDescriptionFile_list + } else { + enrichDatabaseDescriptionFile <- enrichDatabaseDescriptionFile_list[[i]] + } + interestGeneFile <- interestGeneFile_list[[i]] + interestGene <- interestGene_list[[i]] + interestGeneType <- interestGeneType_list[[i]] + referenceGeneFile <- referenceGeneFile_list[[i]] + referenceGene <- referenceGene_list[[i]] + referenceGeneType <- referenceGeneType_list[[i]] + referenceSet <- referenceSet_list[[i]] + numAnnoRefUserId <- NULL + dagJson <- list() + allEnrichedSig <- enrichedSig + repAdded <- FALSE + if (organism != "others") { + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } + standardId <- interestingGeneMap$standardId + if (enrichMethod == "ORA") { + interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) + numAnnoRefUserId <- length(intersect( + interestGeneList, + intersect(referenceGeneList, geneSet$gene) + )) + } - ##### Summary Tab ######## - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + ##### Summary Tab ######## + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - ########### GOSlim summary ######################### - if(standardId=="entrezgene"){ - bodyContent <- paste(bodyContent, goSlimReport(projectName), sep='\n') - } + ########### GOSlim summary ######################### + if (standardId == "entrezgene") { + bodyContent <- paste(bodyContent, goSlimReport(projectName), sep = "\n") + } - ############ Enrichment result ################## - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') - if (!is.null(geneSetDag)) { - if (!is.vector(geneSetDag)) { - # for backward compatibility, it is unlisted for single dataset - geneSetDag <- list(geneSetDag) - names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed=TRUE)) - } - for (name in names(geneSetDag)) { - dag <- geneSetDag[[name]] - if (is.null(dag)) { - #dagJson[[name]] <- list(NULL) - next - } - dagRes <- expandDag(enrichedSig$geneSet, dag) - dagEdges <- dagRes$edges - dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) - dagJson[[name]] <- c(dagEdges, dagNodes) - } - } - } - } else { - ########### Organism is others. No mapping information ############# - ############# Summary for the analysis ################### - if (enrichMethod == "ORA") { - numAnnoRefUserId <- length(intersect(interestingGeneMap, - intersect(referenceGeneList, geneSet$gene))) - } - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } + ############ Enrichment result ################## + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + if (!is.null(geneSetDag)) { + if (!is.vector(geneSetDag)) { + # for backward compatibility, it is unlisted for single dataset + geneSetDag <- list(geneSetDag) + names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed = TRUE)) + } + for (name in names(geneSetDag)) { + dag <- geneSetDag[[name]] + if (is.null(dag)) { + # dagJson[[name]] <- list(NULL) + next + } + dagRes <- expandDag(enrichedSig$geneSet, dag) + dagEdges <- dagRes$edges + dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) + dagJson[[name]] <- c(dagEdges, dagNodes) + } + } + } + } else { + ########### Organism is others. No mapping information ############# + ############# Summary for the analysis ################### + if (enrichMethod == "ORA") { + numAnnoRefUserId <- length(intersect( + interestingGeneMap, + intersect(referenceGeneList, geneSet$gene) + )) + } + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - ############## Enrich Result ################ - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') - } - standardId <- NULL - } - if (is.null(enrichedSig)) { - enrichedSig <- data.frame() - } - if (is.null(background)) { - background <- data.frame() - } - version <- packageVersion("WebGestaltR") - # use major and minor version numbers for JS lib. If API changes, version should be bumped - # patch number should not matter - version <- paste(version[1, 1], version[1, 2], sep = ".") - hasGeneSetDag = !is.null(geneSetDag) - hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape - allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { - gsub(".gmt", "", basename(x), fixed=TRUE) - })))) # sapply on NULL will return a list + ############## Enrich Result ################ + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + } + standardId <- NULL + } + if (is.null(enrichedSig)) { + enrichedSig <- data.frame() + } + if (is.null(background)) { + background <- data.frame() + } + version <- packageVersion("WebGestaltR") + # use major and minor version numbers for JS lib. If API changes, version should be bumped + # patch number should not matter + version <- paste(version[1, 1], version[1, 2], sep = ".") + hasGeneSetDag <- !is.null(geneSetDag) + hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape + allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { + gsub(".gmt", "", basename(x), fixed = TRUE) + })))) # sapply on NULL will return a list + tabs[[i]] <- list(title = listNames[i], bodyContent = bodyContent) + } + } - header <- readLines(system.file("templates/header.mustache", package="WebGestaltR")) - footer <- readLines(system.file("templates/footer.mustache", package="WebGestaltR")) - template <- readLines(system.file("templates/template.mustache", package="WebGestaltR")) - data <- list(hostName=hostName, bodyContent=bodyContent, - organism=organism, enrichDatabaseJson=toJSON(allDbNames, auto_unbox=TRUE), - sigJson=toJSON(enrichedSig, digits=16), insigJson=toJSON(background, digits=16), - dagJson=toJSON(dagJson, auto_unbox=TRUE), hasGeneSetDag=hasGeneSetDag, version=version, - clusterJson=toJSON(clusters), hasCytoscape=hasCytoscape, - geneTableJson=toJSON(geneTables), standardId=standardId, numAnnoRefUserId=numAnnoRefUserId, - methodIsGsea=enrichMethod=="GSEA", hasGeneSetDes=!is.null(geneSetDes) - ) - cat(whisker.render(template, data, partials=list(header=header, footer=footer)), file=outputHtmlFile) + allContent <- "\n" + for (i in seq_along(tabs)) { + allContent <- paste(allContent, "", tabs[[i]]$bodyContent, "\n", sep = "") + } + allContent <- paste(allContent, "\n", sep = "") + # tabs <- list(tabs = tabs) + print(toJSON(tabs)) + header <- readLines(system.file("templates/header.mustache", package = "WebGestaltR")) + footer <- readLines(system.file("templates/footer.mustache", package = "WebGestaltR")) + template <- readLines(system.file("templates/meta_template.mustache", package = "WebGestaltR")) + data <- list( + hostName = hostName, allContent = allContent, + organism = organism, enrichDatabaseJson = toJSON(allDbNames, auto_unbox = TRUE), + sigJson = toJSON(enrichedSig, digits = 16), insigJson = toJSON(background, digits = 16), + dagJson = toJSON(dagJson, auto_unbox = TRUE), hasGeneSetDag = hasGeneSetDag, version = version, + clusterJson = toJSON(clusters), hasCytoscape = hasCytoscape, + geneTableJson = toJSON(geneTables), standardId = standardId, numAnnoRefUserId = numAnnoRefUserId, + methodIsGsea = enrichMethod == "GSEA", hasGeneSetDes = !is.null(geneSetDes) + ) + cat(whisker.render(template, data, partials = list(header = header, footer = footer)), file = outputHtmlFile) } diff --git a/inst/templates/meta_template.mustache b/inst/templates/meta_template.mustache new file mode 100644 index 0000000..c479f0d --- /dev/null +++ b/inst/templates/meta_template.mustache @@ -0,0 +1,122 @@ + + + + + +WebGestalt (WEB-based GEne SeT AnaLysis Toolkit) + + + + + {{#hasCytoscape}} + + {{/hasCytoscape}} + {{^hasCytoscape}} + + {{/hasCytoscape}} + + + + + +{{>header}} +

+
+
+
+ {{{allContent}}} +
+
+
+ + +{{>footer}} + + diff --git a/man/createMetaReport.Rd b/man/createMetaReport.Rd index c07fcb5..436577e 100644 --- a/man/createMetaReport.Rd +++ b/man/createMetaReport.Rd @@ -10,28 +10,28 @@ createMetaReport( organism = "hsapiens", projectName, enrichMethod, - geneSet, - geneSetDes, - geneSetDag, - geneSetNet, - interestingGeneMap, - referenceGeneList, - enrichedSig, - geneTables, - clusters, - background, - enrichDatabase = NULL, - enrichDatabaseFile = NULL, - enrichDatabaseType = NULL, - enrichDatabaseDescriptionFile = NULL, - interestGeneFile = NULL, - interestGene = NULL, - interestGeneType = NULL, + geneSet_list, + geneSetDes_list, + geneSetDag_list, + geneSetNet_list, + interestingGeneMap_list, + referenceGeneList_list, + enrichedSig_list, + geneTables_list, + clusters_list, + background_list, + enrichDatabase_list = NULL, + enrichDatabaseFile_list = NULL, + enrichDatabaseType_list = NULL, + enrichDatabaseDescriptionFile_list = NULL, + interestGeneFile_list = NULL, + interestGene_list = NULL, + interestGeneType_list = NULL, collapseMethod = "mean", - referenceGeneFile = NULL, - referenceGene = NULL, - referenceGeneType = NULL, - referenceSet = NULL, + referenceGeneFile_list = NULL, + referenceGene_list = NULL, + referenceGeneType_list = NULL, + referenceSet_list = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", @@ -41,7 +41,8 @@ createMetaReport( reportNum = 20, perNum = 1000, p = 1, - dagColor = "binary" + dagColor = "binary", + listNames = NULL ) } \description{ From 2020f35cb234117c2c894778e40a92443b0e048f Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 4 Dec 2023 11:28:33 -0600 Subject: [PATCH 81/82] save progress of new dataset --- R/createMetaReport.R | 8 +- R/metaEnrichResultSection.R | 39 ++++++ .../enrichResultSection_meta.mustache | 128 ++++++++++++++++++ man/metaEnrichResultSection.Rd | 21 +++ 4 files changed, 192 insertions(+), 4 deletions(-) create mode 100644 R/metaEnrichResultSection.R create mode 100644 inst/templates/enrichResultSection_meta.mustache create mode 100644 man/metaEnrichResultSection.Rd diff --git a/R/createMetaReport.R b/R/createMetaReport.R index 08d5948..92b0444 100644 --- a/R/createMetaReport.R +++ b/R/createMetaReport.R @@ -26,7 +26,7 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p if (i == 0) { } else { - print(paste("Processing dataset", i, "of", length(interestingGeneMap_list))) + print(paste("Processing dataset", i, "of", length(interestingGeneMap_list))) enrichedSig <- enrichedSig_list[[i + 1]] geneSet <- geneSet_list[[i]] if (!is.null(geneSetDes_list) && length(geneSetDes_list) > 0) { @@ -112,7 +112,7 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p ############ Enrichment result ################## if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + bodyContent <- paste(bodyContent, metaEnrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, i), seq = "\n") if (!is.null(geneSetDag)) { if (!is.vector(geneSetDag)) { # for backward compatibility, it is unlisted for single dataset @@ -158,7 +158,7 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p ############## Enrich Result ################ if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + bodyContent <- paste(bodyContent, metaEnrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, i), seq = "\n") } standardId <- NULL } @@ -183,7 +183,7 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p allContent <- "\n" for (i in seq_along(tabs)) { - allContent <- paste(allContent, "", tabs[[i]]$bodyContent, "\n", sep = "") + allContent <- paste(allContent, "", tabs[[i]]$bodyContent, "\n", sep = "") } allContent <- paste(allContent, "\n", sep = "") # tabs <- list(tabs = tabs) diff --git a/R/metaEnrichResultSection.R b/R/metaEnrichResultSection.R new file mode 100644 index 0000000..90492c5 --- /dev/null +++ b/R/metaEnrichResultSection.R @@ -0,0 +1,39 @@ +#' metaEnrichResultSection +#' +#' Conditionally render template of main result section. Actual work is carried out in front end +#' +#' @importFrom whisker whisker.render +#' @importFrom dplyr select distinct filter +#' @importFrom jsonlite toJSON +#' @keywords internal +#' +metaEnrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, meta_id) { + if ("database" %in% colnames(geneSet)) { + # multiple databases + netDatabases <- names(geneSetNet[!sapply(geneSetNet, is.null)]) + setSource <- geneSet %>% + select(.data$geneSet, .data$database) %>% + distinct() %>% + filter(.data$geneSet %in% enrichedSig$geneSet) + setsWithNetJson <- toJSON((filter(setSource, .data$database %in% netDatabases))$geneSet) + hasGeneSetDag <- length(geneSetDag[!sapply(geneSetDag, is.null)]) > 0 + hasMultipleDatabases <- TRUE + } else { + setsWithNetJson <- toJSON(!is.null(geneSetNet), auto_unbox = TRUE) + hasGeneSetDag <- !is.null(geneSetDag) + hasMultipleDatabases <- FALSE + } + + data <- list( + methodIsOra = enrichMethod == "ORA", + hasGeneSetDag = hasGeneSetDag, + hasMultipleDatabases = hasMultipleDatabases, + setsWithNetJson = setsWithNetJson, + hasAp = !is.null(clusters$ap), + hasWsc = !is.null(clusters$wsc), + hasKmed = !is.null(clusters$km), + meta_id = meta_id + ) + template <- readLines(system.file("templates/enrichResultSection.mustache", package = "WebGestaltR")) + return(whisker.render(template, data)) +} diff --git a/inst/templates/enrichResultSection_meta.mustache b/inst/templates/enrichResultSection_meta.mustache new file mode 100644 index 0000000..fe594de --- /dev/null +++ b/inst/templates/enrichResultSection_meta.mustache @@ -0,0 +1,128 @@ +

Enrichment Results

+
+

+ + Redundancy reduction: + + None + {{#hasAp}} + + + Affinity Propagation + + + {{/hasAp}} + {{#hasKmed}} + + + k-Medoid + + + {{/hasKmed}} + {{#hasWsc}} + + + Weighted set cover + + + {{/hasWsc}} +

+ + + + + + {{#hasMultipleDatabases}} + + + {{! temp delimiter change to keep brackets for Vue}} + {{=<% %>=}} + + + <%={{ }}=%> + + + {{/hasMultipleDatabases}} +
+ + +
+
+ + {{#hasMultipleDatabases}} + + + {{! temp delimiter change to keep brackets for Vue}} + {{=<% %>=}} + + + <%={{ }}=%> + + + {{/hasMultipleDatabases}} +
+ +
+
+ {{#hasGeneSetDag}} + + + + {{! temp delimiter change to keep brackets for Vue}} + {{=<% %>=}} + + <%={{ }}=%> + + +
+ +
+
+ {{/hasGeneSetDag}} +
+ +
diff --git a/man/metaEnrichResultSection.Rd b/man/metaEnrichResultSection.Rd new file mode 100644 index 0000000..2073dd1 --- /dev/null +++ b/man/metaEnrichResultSection.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metaEnrichResultSection.R +\name{metaEnrichResultSection} +\alias{metaEnrichResultSection} +\title{metaEnrichResultSection} +\usage{ +metaEnrichResultSection( + enrichMethod, + enrichedSig, + geneSet, + geneSetDes, + geneSetDag, + geneSetNet, + clusters, + meta_id +) +} +\description{ +Conditionally render template of main result section. Actual work is carried out in front end +} +\keyword{internal} From 78c72e09091cc8028d10de8073258ceea087f6a8 Mon Sep 17 00:00:00 2001 From: John Elizarraras Date: Mon, 4 Dec 2023 15:00:58 -0600 Subject: [PATCH 82/82] working version of iFrames --- R/createMetaReport.R | 134 ++-------- R/createReport.R | 248 ++++++++++-------- R/metaEnrichResultSection.R | 3 +- inst/templates/meta_partial_template.mustache | 118 +++++++++ inst/templates/meta_template.mustache | 105 +------- man/createReport.Rd | 4 +- man/metaEnrichResultSection.Rd | 3 +- 7 files changed, 298 insertions(+), 317 deletions(-) create mode 100644 inst/templates/meta_partial_template.mustache diff --git a/R/createMetaReport.R b/R/createMetaReport.R index 92b0444..5ae5081 100644 --- a/R/createMetaReport.R +++ b/R/createMetaReport.R @@ -15,7 +15,10 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p referenceGeneType_list = NULL, referenceSet_list = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, reportNum = 20, perNum = 1000, p = 1, dagColor = "binary", listNames = NULL) { outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) - + version <- packageVersion("WebGestaltR") + # use major and minor version numbers for JS lib. If API changes, version should be bumped + # patch number should not matter + version <- paste(version[1, 1], version[1, 2], sep = ".") # if hostname starts with "file://", it is used as WebGestaltReporter if (startsWith(hostName, "file://")) { # change back hostName for web assets and browsers will cache it. @@ -78,112 +81,31 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p referenceSet <- referenceSet_list[[i]] numAnnoRefUserId <- NULL dagJson <- list() - allEnrichedSig <- enrichedSig - repAdded <- FALSE - if (organism != "others") { - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } - standardId <- interestingGeneMap$standardId - if (enrichMethod == "ORA") { - interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) - numAnnoRefUserId <- length(intersect( - interestGeneList, - intersect(referenceGeneList, geneSet$gene) - )) - } - - ##### Summary Tab ######## - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - - ########### GOSlim summary ######################### - if (standardId == "entrezgene") { - bodyContent <- paste(bodyContent, goSlimReport(projectName), sep = "\n") - } - - ############ Enrichment result ################## - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, metaEnrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, i), seq = "\n") - if (!is.null(geneSetDag)) { - if (!is.vector(geneSetDag)) { - # for backward compatibility, it is unlisted for single dataset - geneSetDag <- list(geneSetDag) - names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed = TRUE)) - } - for (name in names(geneSetDag)) { - dag <- geneSetDag[[name]] - if (is.null(dag)) { - # dagJson[[name]] <- list(NULL) - next - } - dagRes <- expandDag(enrichedSig$geneSet, dag) - dagEdges <- dagRes$edges - dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) - dagJson[[name]] <- c(dagEdges, dagNodes) - } - } - } - } else { - ########### Organism is others. No mapping information ############# - ############# Summary for the analysis ################### - if (enrichMethod == "ORA") { - numAnnoRefUserId <- length(intersect( - interestingGeneMap, - intersect(referenceGeneList, geneSet$gene) - )) - } - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } - - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - - ############## Enrich Result ################ - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, metaEnrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, i), seq = "\n") - } - standardId <- NULL - } - if (is.null(enrichedSig)) { - enrichedSig <- data.frame() - } - if (is.null(background)) { - background <- data.frame() - } - version <- packageVersion("WebGestaltR") - # use major and minor version numbers for JS lib. If API changes, version should be bumped - # patch number should not matter - version <- paste(version[1, 1], version[1, 2], sep = ".") - hasGeneSetDag <- !is.null(geneSetDag) - hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape - allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { - gsub(".gmt", "", basename(x), fixed = TRUE) - })))) # sapply on NULL will return a list - tabs[[i]] <- list(title = listNames[i], bodyContent = bodyContent) + relative_path <- paste0("./Report_", projectName, "_", i, ".html") + partial_output <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, "_", i, ".html")) + createReport( + hostName = hostName, outputDirectory = outputDirectory, organism = organism, + projectName = projectName, enrichMethod = enrichMethod, geneSet = geneSet, + geneSetDes = geneSetDes, geneSetDag = geneSetDag, geneSetNet = geneSetNet, + interestingGeneMap = interestingGeneMap, referenceGeneList = referenceGeneList, + enrichedSig = enrichedSig, background = background, geneTables = geneTables, + clusters = clusters, enrichDatabase = enrichDatabase, + enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType, + enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, + interestGeneFile = interestGeneFile, interestGene = interestGene, + interestGeneType = interestGeneType, collapseMethod = collapseMethod, + referenceGeneFile = referenceGeneFile, referenceGene = referenceGene, + referenceGeneType = referenceGeneType, referenceSet = referenceSet, minNum = minNum, + maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr, + topThr = topThr, reportNum = reportNum, dagColor = dagColor, outputHtmlFile = partial_output, is_meta = TRUE + ) + tabs[[i]] <- list(title = listNames[[i]], path = relative_path) } } allContent <- "\n" for (i in seq_along(tabs)) { - allContent <- paste(allContent, "", tabs[[i]]$bodyContent, "\n", sep = "") + allContent <- paste(allContent, "\n", sep = "") } allContent <- paste(allContent, "\n", sep = "") # tabs <- list(tabs = tabs) @@ -192,13 +114,7 @@ createMetaReport <- function(hostName, outputDirectory, organism = "hsapiens", p footer <- readLines(system.file("templates/footer.mustache", package = "WebGestaltR")) template <- readLines(system.file("templates/meta_template.mustache", package = "WebGestaltR")) data <- list( - hostName = hostName, allContent = allContent, - organism = organism, enrichDatabaseJson = toJSON(allDbNames, auto_unbox = TRUE), - sigJson = toJSON(enrichedSig, digits = 16), insigJson = toJSON(background, digits = 16), - dagJson = toJSON(dagJson, auto_unbox = TRUE), hasGeneSetDag = hasGeneSetDag, version = version, - clusterJson = toJSON(clusters), hasCytoscape = hasCytoscape, - geneTableJson = toJSON(geneTables), standardId = standardId, numAnnoRefUserId = numAnnoRefUserId, - methodIsGsea = enrichMethod == "GSEA", hasGeneSetDes = !is.null(geneSetDes) + hostName = hostName, allContent = allContent, version = version ) cat(whisker.render(template, data, partials = list(header = header, footer = footer)), file = outputHtmlFile) } diff --git a/R/createReport.R b/R/createReport.R index 55db6b3..25164ad 100644 --- a/R/createReport.R +++ b/R/createReport.R @@ -7,123 +7,147 @@ #' #' @keywords internal #' -createReport <- function(hostName, outputDirectory, organism="hsapiens", projectName, enrichMethod, geneSet, geneSetDes, geneSetDag, geneSetNet, interestingGeneMap, referenceGeneList, enrichedSig, geneTables, clusters, background, enrichDatabase=NULL, enrichDatabaseFile=NULL, enrichDatabaseType=NULL, enrichDatabaseDescriptionFile=NULL, interestGeneFile=NULL, interestGene=NULL, interestGeneType=NULL, collapseMethod="mean", referenceGeneFile=NULL, referenceGene=NULL, referenceGeneType=NULL, referenceSet=NULL, minNum=10, maxNum=500, fdrMethod="BH", sigMethod="fdr", fdrThr=0.05, topThr=10, reportNum=20, perNum=1000, p=1, dagColor="binary") { - outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) +createReport <- function(hostName, outputDirectory, organism = "hsapiens", projectName, enrichMethod, geneSet, geneSetDes, geneSetDag, geneSetNet, + interestingGeneMap, referenceGeneList, enrichedSig, geneTables, clusters, background, enrichDatabase = NULL, enrichDatabaseFile = NULL, enrichDatabaseType = NULL, + enrichDatabaseDescriptionFile = NULL, interestGeneFile = NULL, interestGene = NULL, interestGeneType = NULL, collapseMethod = "mean", referenceGeneFile = NULL, + referenceGene = NULL, referenceGeneType = NULL, referenceSet = NULL, minNum = 10, maxNum = 500, fdrMethod = "BH", sigMethod = "fdr", fdrThr = 0.05, topThr = 10, + reportNum = 20, perNum = 1000, p = 1, dagColor = "binary", is_meta = FALSE, outputHtmlFile = NULL) { + if (is.null(outputHtmlFile)) { + outputHtmlFile <- file.path(outputDirectory, paste0("Project_", projectName), paste0("Report_", projectName, ".html")) + } + # if hostname starts with "file://", it is used as WebGestaltReporter + if (startsWith(hostName, "file://")) { + # change back hostName for web assets and browsers will cache it. + hostName <- "https://www.webgestalt.org" + } - # if hostname starts with "file://", it is used as WebGestaltReporter - if (startsWith(hostName, "file://")) { - # change back hostName for web assets and browsers will cache it. - hostName <- "https://www.webgestalt.org" - } + numAnnoRefUserId <- NULL + dagJson <- list() + allEnrichedSig <- enrichedSig + repAdded <- FALSE + if (organism != "others") { + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } + standardId <- interestingGeneMap$standardId + if (enrichMethod == "ORA") { + interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) + numAnnoRefUserId <- length(intersect( + interestGeneList, + intersect(referenceGeneList, geneSet$gene) + )) + } - numAnnoRefUserId <- NULL - dagJson <- list() - allEnrichedSig <- enrichedSig - repAdded <- FALSE - if (organism!="others") { - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } - standardId <- interestingGeneMap$standardId - if (enrichMethod == 'ORA') { - interestGeneList <- unique(interestingGeneMap$mapped[[standardId]]) - numAnnoRefUserId <- length(intersect(interestGeneList, - intersect(referenceGeneList, geneSet$gene))) - } + ##### Summary Tab ######## + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - ##### Summary Tab ######## - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + ########### GOSlim summary ######################### + if (standardId == "entrezgene") { + bodyContent <- paste(bodyContent, goSlimReport(projectName), sep = "\n") + } - ########### GOSlim summary ######################### - if(standardId=="entrezgene"){ - bodyContent <- paste(bodyContent, goSlimReport(projectName), sep='\n') - } + ############ Enrichment result ################## + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + if (!is.null(geneSetDag)) { + if (!is.vector(geneSetDag)) { + # for backward compatibility, it is unlisted for single dataset + geneSetDag <- list(geneSetDag) + names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed = TRUE)) + } + for (name in names(geneSetDag)) { + dag <- geneSetDag[[name]] + if (is.null(dag)) { + # dagJson[[name]] <- list(NULL) + next + } + dagRes <- expandDag(enrichedSig$geneSet, dag) + dagEdges <- dagRes$edges + dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) + dagJson[[name]] <- c(dagEdges, dagNodes) + } + } + } + } else { + ########### Organism is others. No mapping information ############# + ############# Summary for the analysis ################### + if (enrichMethod == "ORA") { + numAnnoRefUserId <- length(intersect( + interestingGeneMap, + intersect(referenceGeneList, geneSet$gene) + )) + } + if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { + if (enrichMethod == "ORA") { + enrichedSig <- enrichedSig[1:reportNum, ] + } else if (enrichMethod == "GSEA") { + enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] + } + # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top + numRes <- nrow(enrichedSig) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) + enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) + repAdded <- nrow(enrichedSig) > numRes + } - ############ Enrichment result ################## - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') - if (!is.null(geneSetDag)) { - if (!is.vector(geneSetDag)) { - # for backward compatibility, it is unlisted for single dataset - geneSetDag <- list(geneSetDag) - names(geneSetDag) <- ifelse(is.character(enrichDatabase), enrichDatabase, gsub(".gmt", "", basename(enrichDatabaseFile), fixed=TRUE)) - } - for (name in names(geneSetDag)) { - dag <- geneSetDag[[name]] - if (is.null(dag)) { - #dagJson[[name]] <- list(NULL) - next - } - dagRes <- expandDag(enrichedSig$geneSet, dag) - dagEdges <- dagRes$edges - dagNodes <- getDagNodes(enrichedSig, dagRes$allNodes, geneSetDes, enrichMethod, dagColor) - dagJson[[name]] <- c(dagEdges, dagNodes) - } - } - } - } else { - ########### Organism is others. No mapping information ############# - ############# Summary for the analysis ################### - if (enrichMethod == 'ORA') { - numAnnoRefUserId <- length(intersect(interestingGeneMap, - intersect(referenceGeneList, geneSet$gene))) - } - if (!is.null(enrichedSig) && reportNum < nrow(enrichedSig)) { - if (enrichMethod == "ORA") { - enrichedSig <- enrichedSig[1:reportNum, ] - } else if (enrichMethod == "GSEA") { - enrichedSig <- getTopGseaResults(enrichedSig, reportNum / 2)[[1]] - } - # Add representatives if they are not in top ReportNum. So could be more if ReportNum.is small and high redundancy in top - numRes <- nrow(enrichedSig) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$ap$representatives) - enrichedSig <- keepRep(enrichedSig, allEnrichedSig, clusters$wsc$representatives) - repAdded <- nrow(enrichedSig) > numRes - } + bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) - bodyContent <- summaryDescription(projectName, organism, interestGeneFile, interestGene, interestGeneType, enrichMethod, enrichDatabase, enrichDatabaseFile, enrichDatabaseType, enrichDatabaseDescriptionFile, interestingGeneMap, referenceGeneList, referenceGeneFile, referenceGene, referenceGeneType, referenceSet, minNum, maxNum, sigMethod, fdrThr, topThr, fdrMethod, allEnrichedSig, reportNum, perNum, p, geneSet, repAdded, numAnnoRefUserId, hostName) + ############## Enrich Result ################ + if (!is.null(enrichedSig)) { + bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq = "\n") + } + standardId <- NULL + } + if (is.null(enrichedSig)) { + enrichedSig <- data.frame() + } + if (is.null(background)) { + background <- data.frame() + } + version <- packageVersion("WebGestaltR") + # use major and minor version numbers for JS lib. If API changes, version should be bumped + # patch number should not matter + version <- paste(version[1, 1], version[1, 2], sep = ".") + hasGeneSetDag <- !is.null(geneSetDag) + hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape + allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { + gsub(".gmt", "", basename(x), fixed = TRUE) + })))) # sapply on NULL will return a list - ############## Enrich Result ################ - if (!is.null(enrichedSig)) { - bodyContent <- paste(bodyContent, enrichResultSection(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters), seq='\n') - } - standardId <- NULL - } - if (is.null(enrichedSig)) { - enrichedSig <- data.frame() - } - if (is.null(background)) { - background <- data.frame() - } - version <- packageVersion("WebGestaltR") - # use major and minor version numbers for JS lib. If API changes, version should be bumped - # patch number should not matter - version <- paste(version[1, 1], version[1, 2], sep=".") - hasGeneSetDag = !is.null(geneSetDag) - hasCytoscape <- hasGeneSetDag || !is.null(geneSetNet) # DAG or network needs cytoscape - allDbNames <- unlist(c(enrichDatabase, unname(sapply(enrichDatabaseFile, function(x) { - gsub(".gmt", "", basename(x), fixed=TRUE) - })))) # sapply on NULL will return a list - - header <- readLines(system.file("templates/header.mustache", package="WebGestaltR")) - footer <- readLines(system.file("templates/footer.mustache", package="WebGestaltR")) - template <- readLines(system.file("templates/template.mustache", package="WebGestaltR")) - data <- list(hostName=hostName, bodyContent=bodyContent, - organism=organism, enrichDatabaseJson=toJSON(allDbNames, auto_unbox=TRUE), - sigJson=toJSON(enrichedSig, digits=16), insigJson=toJSON(background, digits=16), - dagJson=toJSON(dagJson, auto_unbox=TRUE), hasGeneSetDag=hasGeneSetDag, version=version, - clusterJson=toJSON(clusters), hasCytoscape=hasCytoscape, - geneTableJson=toJSON(geneTables), standardId=standardId, numAnnoRefUserId=numAnnoRefUserId, - methodIsGsea=enrichMethod=="GSEA", hasGeneSetDes=!is.null(geneSetDes) - ) - cat(whisker.render(template, data, partials=list(header=header, footer=footer)), file=outputHtmlFile) + if (is_meta) { + template <- readLines(system.file("templates/meta_partial_template.mustache", package = "WebGestaltR")) + data <- list( + hostName = hostName, bodyContent = bodyContent, + organism = organism, enrichDatabaseJson = toJSON(allDbNames, auto_unbox = TRUE), + sigJson = toJSON(enrichedSig, digits = 16), insigJson = toJSON(background, digits = 16), + dagJson = toJSON(dagJson, auto_unbox = TRUE), hasGeneSetDag = hasGeneSetDag, version = version, + clusterJson = toJSON(clusters), hasCytoscape = hasCytoscape, + geneTableJson = toJSON(geneTables), standardId = standardId, numAnnoRefUserId = numAnnoRefUserId, + methodIsGsea = enrichMethod == "GSEA", hasGeneSetDes = !is.null(geneSetDes) + ) + cat(whisker.render(template, data), file = outputHtmlFile) + } else { + header <- readLines(system.file("templates/header.mustache", package = "WebGestaltR")) + footer <- readLines(system.file("templates/footer.mustache", package = "WebGestaltR")) + template <- readLines(system.file("templates/template.mustache", package = "WebGestaltR")) + data <- list( + hostName = hostName, bodyContent = bodyContent, + organism = organism, enrichDatabaseJson = toJSON(allDbNames, auto_unbox = TRUE), + sigJson = toJSON(enrichedSig, digits = 16), insigJson = toJSON(background, digits = 16), + dagJson = toJSON(dagJson, auto_unbox = TRUE), hasGeneSetDag = hasGeneSetDag, version = version, + clusterJson = toJSON(clusters), hasCytoscape = hasCytoscape, + geneTableJson = toJSON(geneTables), standardId = standardId, numAnnoRefUserId = numAnnoRefUserId, + methodIsGsea = enrichMethod == "GSEA", hasGeneSetDes = !is.null(geneSetDes) + ) + cat(whisker.render(template, data, partials = list(header = header, footer = footer)), file = outputHtmlFile) + } } diff --git a/R/metaEnrichResultSection.R b/R/metaEnrichResultSection.R index 90492c5..e1471a5 100644 --- a/R/metaEnrichResultSection.R +++ b/R/metaEnrichResultSection.R @@ -7,7 +7,7 @@ #' @importFrom jsonlite toJSON #' @keywords internal #' -metaEnrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters, meta_id) { +metaEnrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetDes, geneSetDag, geneSetNet, clusters) { if ("database" %in% colnames(geneSet)) { # multiple databases netDatabases <- names(geneSetNet[!sapply(geneSetNet, is.null)]) @@ -32,7 +32,6 @@ metaEnrichResultSection <- function(enrichMethod, enrichedSig, geneSet, geneSetD hasAp = !is.null(clusters$ap), hasWsc = !is.null(clusters$wsc), hasKmed = !is.null(clusters$km), - meta_id = meta_id ) template <- readLines(system.file("templates/enrichResultSection.mustache", package = "WebGestaltR")) return(whisker.render(template, data)) diff --git a/inst/templates/meta_partial_template.mustache b/inst/templates/meta_partial_template.mustache new file mode 100644 index 0000000..2c435ab --- /dev/null +++ b/inst/templates/meta_partial_template.mustache @@ -0,0 +1,118 @@ + + + + + +WebGestalt (WEB-based GEne SeT AnaLysis Toolkit) + + + + + {{#hasCytoscape}} + + {{/hasCytoscape}} + {{^hasCytoscape}} + + {{/hasCytoscape}} + + + + + +
+
+
+
+{{{bodyContent}}} +
+
+
+ + + + diff --git a/inst/templates/meta_template.mustache b/inst/templates/meta_template.mustache index c479f0d..b19b5fc 100644 --- a/inst/templates/meta_template.mustache +++ b/inst/templates/meta_template.mustache @@ -2,6 +2,7 @@ + WebGestalt (WEB-based GEne SeT AnaLysis Toolkit) @@ -14,108 +15,30 @@ ga('send', 'pageview'); - {{#hasCytoscape}} - - {{/hasCytoscape}} - {{^hasCytoscape}} - - {{/hasCytoscape}} + {{>header}}
-
-
- {{{allContent}}} +
+{{{allContent}}}
- - + {{>footer}} diff --git a/man/createReport.Rd b/man/createReport.Rd index 4116011..ddb06b9 100644 --- a/man/createReport.Rd +++ b/man/createReport.Rd @@ -41,7 +41,9 @@ createReport( reportNum = 20, perNum = 1000, p = 1, - dagColor = "binary" + dagColor = "binary", + is_meta = FALSE, + outputHtmlFile = NULL ) } \description{ diff --git a/man/metaEnrichResultSection.Rd b/man/metaEnrichResultSection.Rd index 2073dd1..5420361 100644 --- a/man/metaEnrichResultSection.Rd +++ b/man/metaEnrichResultSection.Rd @@ -11,8 +11,7 @@ metaEnrichResultSection( geneSetDes, geneSetDag, geneSetNet, - clusters, - meta_id + clusters ) } \description{