diff --git a/R/amp_subset_samples.R b/R/amp_subset_samples.R index 204bfb19..bdea4473 100644 --- a/R/amp_subset_samples.R +++ b/R/amp_subset_samples.R @@ -5,9 +5,10 @@ #' @usage amp_subset_samples(data, ...) #' #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}. -#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1}) #' @param ... Logical expression indicating elements or rows to keep in the metadata. Missing values are taken as false. Directly passed to \code{subset()}. +#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1}) #' @param normalise (\emph{logical}) Normalise the read abundances to the total amount of reads (percentages) \emph{BEFORE} the subset. (\emph{default:} \code{FALSE}) +#' @param removeAbsents (\emph{logical}) Whether to remove OTU's that may have 0 read abundance in all samples after the subset. (\emph{default:} \code{TRUE}) #' #' @return A list with 3 dataframes (4 if reference sequences are provided). #' @import dplyr @@ -63,7 +64,7 @@ #' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com} -amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { +amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE, removeAbsents = TRUE) { ### Data must be in ampvis2 format if(class(data) != "ampvis2") @@ -85,8 +86,11 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { #remove samples below minreads BEFORE percentages data$abund <- data$abund[, colSums(data$abund) >= minreads, drop = FALSE] + #Subset the metadata again to match any removed sample(s) + data$metadata <- data$metadata[which(rownames(data$metadata) %in% colnames(data$abund)), , drop = FALSE] + ### calculate percentages - if (normalise) { + if (normalise == TRUE) { data$abund <- apply(data$abund,2, function(x) 100*x/sum(x)) %>% as.data.frame() } @@ -95,16 +99,15 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { data$metadata <- droplevels(data$metadata) #Drop unused factor levels or fx heatmaps will show a "NA" column #And only keep columns in otutable that match the rows in the subsetted metadata - data$abund <- data$abund[, rownames(data$metadata), drop = FALSE] + data$abund <- data$abund[, which(colnames(data$abund) %in% rownames(data$metadata)), drop = FALSE] - #After subsetting the samples, remove OTU's that could possibly have 0 reads in all samples - data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE] - - #Subset the metadata again to match any removed sample(s) - data$metadata <- data$metadata[colnames(data$abund), , drop = FALSE] + #After subsetting the samples, remove OTU's that may have 0 reads in all samples + if(removeAbsents == TRUE) { + data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE] + } #Subset taxonomy based on abund - data$tax <- data$tax[rownames(data$abund), , drop = FALSE] + data$tax <- data$tax[which(rownames(data$tax) %in% rownames(data$abund)), , drop = FALSE] #Subset refseq, if any, based on abund if(any(names(data) == "refseq")){ @@ -121,7 +124,7 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) { nsamplesafter <- nrow(data$metadata) %>% as.numeric() nOTUsafter <- nrow(data$abund) %>% as.numeric() if (nsamplesbefore == nsamplesafter) { - print("0 samples have been filtered.") + message("0 samples have been filtered.") } else { message(paste(nsamplesbefore-nsamplesafter, "samples and", nOTUsbefore-nOTUsafter,"OTUs have been filtered \nBefore:", nsamplesbefore, "samples and", nOTUsbefore, "OTUs\nAfter:", nsamplesafter, "samples and", nOTUsafter, "OTUs")) } diff --git a/README.html b/README.html index c4e5d13c..1c43524d 100644 --- a/README.html +++ b/README.html @@ -119,7 +119,7 @@ -
+ampvis2 is an R-package to conveniently visualise and analyse 16S rRNA amplicon data in different ways.
diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png index 4ef33288..fcf08cbb 100644 Binary files a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png index b4e91709..4d334e9c 100644 Binary files a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/reference/amp_subset_samples.html b/docs/reference/amp_subset_samples.html index 9758bd04..b8d90178 100644 --- a/docs/reference/amp_subset_samples.html +++ b/docs/reference/amp_subset_samples.html @@ -111,6 +111,10 @@(logical) Normalise the read abundances to the total amount of reads (percentages) BEFORE the subset. (default: FALSE
)
(logical) Whether to remove OTU's that may have 0 read abundance in all samples after the subset. (default: TRUE
)