diff --git a/R/amp_subset_samples.R b/R/amp_subset_samples.R index 204bfb19..bdea4473 100644 --- a/R/amp_subset_samples.R +++ b/R/amp_subset_samples.R @@ -5,9 +5,10 @@ #' @usage amp_subset_samples(data, ...) #' #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}. -#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1}) #' @param ... Logical expression indicating elements or rows to keep in the metadata. Missing values are taken as false. Directly passed to \code{subset()}. +#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1}) #' @param normalise (\emph{logical}) Normalise the read abundances to the total amount of reads (percentages) \emph{BEFORE} the subset. (\emph{default:} \code{FALSE}) +#' @param removeAbsents (\emph{logical}) Whether to remove OTU's that may have 0 read abundance in all samples after the subset. (\emph{default:} \code{TRUE}) #' #' @return A list with 3 dataframes (4 if reference sequences are provided). #' @import dplyr @@ -63,7 +64,7 @@ #' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com} -amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { +amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE, removeAbsents = TRUE) { ### Data must be in ampvis2 format if(class(data) != "ampvis2") @@ -85,8 +86,11 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { #remove samples below minreads BEFORE percentages data$abund <- data$abund[, colSums(data$abund) >= minreads, drop = FALSE] + #Subset the metadata again to match any removed sample(s) + data$metadata <- data$metadata[which(rownames(data$metadata) %in% colnames(data$abund)), , drop = FALSE] + ### calculate percentages - if (normalise) { + if (normalise == TRUE) { data$abund <- apply(data$abund,2, function(x) 100*x/sum(x)) %>% as.data.frame() } @@ -95,16 +99,15 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { data$metadata <- droplevels(data$metadata) #Drop unused factor levels or fx heatmaps will show a "NA" column #And only keep columns in otutable that match the rows in the subsetted metadata - data$abund <- data$abund[, rownames(data$metadata), drop = FALSE] + data$abund <- data$abund[, which(colnames(data$abund) %in% rownames(data$metadata)), drop = FALSE] - #After subsetting the samples, remove OTU's that could possibly have 0 reads in all samples - data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE] - - #Subset the metadata again to match any removed sample(s) - data$metadata <- data$metadata[colnames(data$abund), , drop = FALSE] + #After subsetting the samples, remove OTU's that may have 0 reads in all samples + if(removeAbsents == TRUE) { + data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE] + } #Subset taxonomy based on abund - data$tax <- data$tax[rownames(data$abund), , drop = FALSE] + data$tax <- data$tax[which(rownames(data$tax) %in% rownames(data$abund)), , drop = FALSE] #Subset refseq, if any, based on abund if(any(names(data) == "refseq")){ @@ -121,7 +124,7 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { nsamplesafter <- nrow(data$metadata) %>% as.numeric() nOTUsafter <- nrow(data$abund) %>% as.numeric() if (nsamplesbefore == nsamplesafter) { - print("0 samples have been filtered.") + message("0 samples have been filtered.") } else { message(paste(nsamplesbefore-nsamplesafter, "samples and", nOTUsbefore-nOTUsafter,"OTUs have been filtered \nBefore:", nsamplesbefore, "samples and", nOTUsbefore, "OTUs\nAfter:", nsamplesafter, "samples and", nOTUsafter, "OTUs")) } diff --git a/README.html b/README.html index c4e5d13c..1c43524d 100644 --- a/README.html +++ b/README.html @@ -119,7 +119,7 @@ -

Travis-CI Build Status

+

Travis-CI Build Status

Tools for visualising amplicon data

ampvis2 is an R-package to conveniently visualise and analyse 16S rRNA amplicon data in different ways.

diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png index 4ef33288..fcf08cbb 100644 Binary files a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png index b4e91709..4d334e9c 100644 Binary files a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/reference/amp_subset_samples.html b/docs/reference/amp_subset_samples.html index 9758bd04..b8d90178 100644 --- a/docs/reference/amp_subset_samples.html +++ b/docs/reference/amp_subset_samples.html @@ -111,6 +111,10 @@

Ar normalise

(logical) Normalise the read abundances to the total amount of reads (percentages) BEFORE the subset. (default: FALSE)

+ + removeAbsents +

(logical) Whether to remove OTU's that may have 0 read abundance in all samples after the subset. (default: TRUE)

+

Value

@@ -187,9 +191,25 @@

Examp #and remove the sample "16SAMP-749". Remove any sample(s) with less than 20000 total reads MiDASsubset2 <- amp_subset_samples(MiDAS, Plant %in% c("Aalborg West", "Aalborg East") & !SampleID %in% c("16SAMP-749"), - minreads = 20000)

#> Error in `[.data.frame`(data$abund, , rownames(data$metadata), drop = FALSE): undefined columns selected
+ minreads = 20000)
#> 525 samples and 6654 OTUs have been filtered +#> Before: 573 samples and 14791 OTUs +#> After: 48 samples and 8137 OTUs
#Summary -MiDASsubset2
#> Error in eval(expr, envir, enclos): object 'MiDASsubset2' not found
+MiDASsubset2
#> ampvis2 object with 4 elements. +#> Summary of OTU table: +#> Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads +#> 48 8137 1437013 20994 38496 29538.5 +#> Avg#Reads +#> 29937.77 +#> +#> Assigned taxonomy: +#> Kingdom Phylum Class Order Family Genus +#> 8137(100%) 7955(97.76%) 7119(87.49%) 6535(80.31%) 5841(71.78%) 4488(55.16%) +#> Species +#> 17(0.21%) +#> +#> Metadata variables: 10 +#> SampleID, Plant, Year, Period, Date, DeltaDays, LIB_ID, Latitude, Longitude, RawData