Skip to content

Commit

Permalink
Merge pull request #34 from microbiomeDB/internal-diff-abund
Browse files Browse the repository at this point in the history
improve diff abund api
  • Loading branch information
d-callan authored Apr 30, 2024
2 parents 20dbe80 + b37e55b commit 59595b2
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 19 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ importFrom(microbiomeComputations,Comparator)
importFrom(microbiomeComputations,DifferentialAbundanceResult)
importFrom(microbiomeComputations,alphaDiv)
importFrom(microbiomeComputations,betaDiv)
importFrom(microbiomeComputations,differentialAbundance)
importFrom(microbiomeComputations,internalDiffAbund)
importFrom(microbiomeComputations,rankedAbundance)
importFrom(microbiomeComputations,selfCorrelation)
importFrom(phyloseq,phyloseq)
Expand Down
73 changes: 67 additions & 6 deletions R/method-differentialAbundance.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ buildBinaryComparator <- function(covariate, groupAValue, groupBValue) {
#' verbose=FALSE
#' )
#'
#' ## this case of categorical variables also has a 'short cut'
#' diffAbundOutput <- MicrobiomeDB::differentialAbundance(
#' getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'),
#' "country",
#' groupA = "Russia",
#' groupB = c("Finland","Estonia"),
#' method='Maaslin2',
#' verbose=FALSE
#' )
#'
#' ## a categorical variable with 2 values
#' diffAbundOutput <- MicrobiomeDB::differentialAbundance(
#' getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'),
Expand All @@ -105,15 +115,17 @@ buildBinaryComparator <- function(covariate, groupAValue, groupBValue) {
#' @param covariate character vector giving the name of a metadata variable of interest. If this
#' variable has only two values, you do not need to provide functions for arguments `groupA` and `groupB`.
#' @param groupA A function that takes a vector of values and returns TRUE or FALSE for each value. This will
#' be used to assign samples to groupA.
#' be used to assign samples to groupA. In the specific case of `covariate` being a categorical variable,
#' this can be a character vector of values to be included in groupA.
#' @param groupB A function that takes a vector of values and returns TRUE or FALSE for each value. This will
#' be used to assign samples to groupB. If not provided, groupB will be the complement of groupA.
#' be used to assign samples to groupB. If not provided, groupB will be the complement of groupA. In the specific
#' case of `covariate` being a categorical variable, this can be a character vector of values to be included in groupB.
#' @param method string defining the the differential abundance method. Accepted values are 'DESeq2' and 'Maaslin2'.
#' Default is 'Maaslin2', as 'DESeq2' only supports counts.
#' @param verbose boolean indicating if timed logging is desired
#' @return ComputeResult object
#' @rdname differentialAbundance-methods
#' @importFrom microbiomeComputations differentialAbundance Comparator
#' @importFrom microbiomeComputations internalDiffAbund Comparator
#' @export
setGeneric("differentialAbundance",
function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verbose = c(TRUE, FALSE)) {
Expand All @@ -137,7 +149,7 @@ function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verb
groupBLabel <- unique(data@sampleMetadata@data[[covariate]])[2]
comparator <- buildBinaryComparator(covariate, groupALabel, groupBLabel)

return(microbiomeComputations::differentialAbundance(data, comparator = comparator, method = method, verbose = verbose))
return(microbiomeComputations::internalDiffAbund(data, comparator = comparator, method = method, verbose = verbose))
})

#' @rdname differentialAbundance-methods
Expand All @@ -154,7 +166,29 @@ function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verb
data@sampleMetadata@data[[covariate]] <- assignToBinaryGroups(data@sampleMetadata@data[[covariate]], groupA, NULL)
comparator <- buildBinaryComparator(covariate, 'groupA', 'groupB')

return(microbiomeComputations::differentialAbundance(data, comparator = comparator, method = method, verbose = verbose))
return(microbiomeComputations::internalDiffAbund(data, comparator = comparator, method = method, verbose = verbose))
})

#' @rdname differentialAbundance-methods
#' @aliases differentialAbundance,CollectionWithMetadata,character,character,missingOrNULL-method
setMethod("differentialAbundance", signature("CollectionWithMetadata", "character", "character", "missingOrNULL"),
function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verbose = c(TRUE, FALSE)) {
method <- veupathUtils::matchArg(method)
verbose <- veupathUtils::matchArg(verbose)

covariateDataType <- class(data@sampleMetadata@data[[covariate]])
if (!covariateDataType %in% c("factor", "character")) {
stop("Argument 'groupA' must be a function when the variable specified by 'covariate' is not a factor or character")
}
covariateUniqueValues <- unique(data@sampleMetadata@data[[covariate]])
if (!any(groupA %in% covariateUniqueValues)) {
# should warn the specified values arent in the covariate
warning("Specified values in 'groupA' are not in the variable specified by 'covariate'")
}

groupAFxn <- function(x) {x %in% groupA}

return(differentialAbundance(data, covariate, groupAFxn, groupB, method, verbose))
})

#' @rdname differentialAbundance-methods
Expand All @@ -172,7 +206,34 @@ function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verb
comparator <- buildBinaryComparator(covariate, 'groupA', 'groupB')

## microbiomeComputations will remove for us rows not in either group, and provide validation
return(microbiomeComputations::differentialAbundance(data, comparator = comparator, method = method, verbose = verbose))
return(microbiomeComputations::internalDiffAbund(data, comparator = comparator, method = method, verbose = verbose))
})

#' @rdname differentialAbundance-methods
#' @aliases differentialAbundance,CollectionWithMetadata,character,character,character-method
setMethod("differentialAbundance", signature("CollectionWithMetadata", "character", "character", "character"),
function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verbose = c(TRUE, FALSE)) {
method <- veupathUtils::matchArg(method)
verbose <- veupathUtils::matchArg(verbose)

covariateDataType <- class(data@sampleMetadata@data[[covariate]])
if (!covariateDataType %in% c("factor", "character")) {
stop("Arguments 'groupA' and 'groupB' must be functions when 'covariate' is not a factor or character")
}
covariateUniqueValues <- unique(data@sampleMetadata@data[[covariate]])
if (!any(groupA %in% covariateUniqueValues)) {
# should warn the specified values arent in the covariate
warning("Specified values in 'groupA' are not in the variable specified by 'covariate'")
}
if (!any(groupB %in% covariateUniqueValues)) {
# should warn the specified values arent in the covariate
warning("Specified values in 'groupB' are not in the variable specified by 'covariate'")
}

groupAFxn <- function(x) {x %in% groupA}
groupBFxn <- function(x) {x %in% groupB}

return(differentialAbundance(data, covariate, groupAFxn, groupBFxn, method, verbose))
})

#### NOTE: While i think its important for people to be able to recreate the computes from the site, and tried to make
Expand Down
36 changes: 34 additions & 2 deletions man/differentialAbundance-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-GetComputeResult.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ test_that("we can get compute results in different formats", {
)
)
)
diffAbundOutput <- microbiomeComputations::differentialAbundance(getCollection(mbioDataset, "16S (V4) Genus"), comparatorVariable, method='Maaslin2', verbose=FALSE)
diffAbundOutput <- microbiomeComputations::internalDiffAbund(getCollection(mbioDataset, "16S (V4) Genus"), comparatorVariable, method='Maaslin2', verbose=FALSE)
expect_equal(inherits(diffAbundOutput, "ComputeResult"), TRUE)

correlationOutput <- MicrobiomeDB::selfCorrelation(getCollection(mbioDataset, "16S (V4) Genus"), method='spearman', verbose=FALSE)
Expand Down
42 changes: 33 additions & 9 deletions tests/testthat/test-differentialAbundance.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ genusCounts <- AbsoluteAbundanceData(
ancestorIdColumns = genus@ancestorIdColumns
)

## TODO tests for assignToBinaryGroups and buildBinaryComparator directly?

test_that("differentialAbundance wrapper works", {
# using a variable thats already binary works
diffAbundOutput <- MicrobiomeDB::differentialAbundance(genus, "delivery_mode", method='Maaslin2', verbose=FALSE)
Expand Down Expand Up @@ -51,6 +49,32 @@ test_that("differentialAbundance wrapper works", {
expect_equal(inherits(diffAbundOutput@statistics@statistics, "data.frame"), TRUE)
expect_equal(nrow(diffAbundOutput@statistics@statistics) > 0, TRUE)

# character vector of values works for categorical vars
diffAbundOutput <- MicrobiomeDB::differentialAbundance(
genus,
"country",
groupA = "Russia",
groupB = c("Finland", "Estonia"),
method='Maaslin2',
verbose=FALSE
)
expect_equal(inherits(diffAbundOutput, "ComputeResult"), TRUE)
expect_equal(inherits(diffAbundOutput@statistics, "DifferentialAbundanceResult"), TRUE)
expect_equal(inherits(diffAbundOutput@statistics@statistics, "data.frame"), TRUE)
expect_equal(nrow(diffAbundOutput@statistics@statistics) > 0, TRUE)

# character vector of values works for continuous vars errs
expect_error(
diffAbundOutput <- MicrobiomeDB::differentialAbundance(
genus,
"breastfed_duration_days",
groupA = "100",
groupB = "300",
method='Maaslin2',
verbose=FALSE
)
)

# excluding some values from both predicates works
diffAbundOutput <- MicrobiomeDB::differentialAbundance(
genus,
Expand Down Expand Up @@ -93,13 +117,13 @@ test_that("differentialAbundance wrapper works", {
# passing overlapping groupA and groupB fails
expect_error(
diffAbundOutput <- MicrobiomeDB::differentialAbundance(
genus,
"breastfed_duration_days",
groupA = function(x) {x<300},
groupB = function(x) {x>=100},
method='Maaslin2',
verbose=FALSE
)
genus,
"breastfed_duration_days",
groupA = function(x) {x<300},
groupB = function(x) {x>=100},
method='Maaslin2',
verbose=FALSE
)
)

# passing only groupA, but where groupA is TRUE for all values fails
Expand Down

0 comments on commit 59595b2

Please sign in to comment.