added option for removing absent OTUs in all samples after a subset

KasperSkytte · Oct 26, 2017 · 0b367a4 · 0b367a4
1 parent cbde6b9
commit 0b367a4
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 14 deletions.
diff --git a/R/amp_subset_samples.R b/R/amp_subset_samples.R
@@ -5,9 +5,10 @@
 #' @usage amp_subset_samples(data, ...)
 #'
 #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}.
-#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1})
 #' @param ... Logical expression indicating elements or rows to keep in the metadata. Missing values are taken as false. Directly passed to \code{subset()}. 
+#' @param minreads Minimum number of reads pr. sample. (\emph{default:} \code{1})
 #' @param normalise (\emph{logical}) Normalise the read abundances to the total amount of reads (percentages) \emph{BEFORE} the subset. (\emph{default:} \code{FALSE})
+#' @param removeAbsents (\emph{logical}) Whether to remove OTU's that may have 0 read abundance in all samples after the subset. (\emph{default:} \code{TRUE})
 #' 
 #' @return A list with 3 dataframes (4 if reference sequences are provided).
 #' @import dplyr
@@ -63,7 +64,7 @@
 #' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
 
 
-amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) {
+amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE, removeAbsents = TRUE) {
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
@@ -85,8 +86,11 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) {
   #remove samples below minreads BEFORE percentages
   data$abund <- data$abund[, colSums(data$abund) >= minreads, drop = FALSE]
 
+  #Subset the metadata again to match any removed sample(s)
+  data$metadata <- data$metadata[which(rownames(data$metadata) %in% colnames(data$abund)), , drop = FALSE]
+
   ### calculate percentages 
-  if (normalise) {
+  if (normalise == TRUE) {
     data$abund <- apply(data$abund,2, function(x) 100*x/sum(x)) %>% as.data.frame() 
   }
 
@@ -95,16 +99,15 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) {
   data$metadata <- droplevels(data$metadata) #Drop unused factor levels or fx heatmaps will show a "NA" column
 
   #And only keep columns in otutable that match the rows in the subsetted metadata
-  data$abund <- data$abund[, rownames(data$metadata), drop = FALSE]
+  data$abund <- data$abund[, which(colnames(data$abund) %in% rownames(data$metadata)), drop = FALSE]
 
-  #After subsetting the samples, remove OTU's that could possibly have 0 reads in all samples
-  data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE]
-
-  #Subset the metadata again to match any removed sample(s)
-  data$metadata <- data$metadata[colnames(data$abund), , drop = FALSE]
+  #After subsetting the samples, remove OTU's that may have 0 reads in all samples
+  if(removeAbsents == TRUE) {
+    data$abund <- data$abund[rowSums(data$abund) > 0, , drop = FALSE]
+  }
 
   #Subset taxonomy based on abund
-  data$tax <- data$tax[rownames(data$abund), , drop = FALSE]
+  data$tax <- data$tax[which(rownames(data$tax) %in% rownames(data$abund)), , drop = FALSE]
 
   #Subset refseq, if any, based on abund
   if(any(names(data) == "refseq")){
@@ -121,7 +124,7 @@ amp_subset_samples <- function(data, ..., minreads = 1, normalise = FALSE) {
   nsamplesafter <- nrow(data$metadata) %>% as.numeric()
   nOTUsafter <- nrow(data$abund) %>% as.numeric()
   if (nsamplesbefore == nsamplesafter) {
-    print("0 samples have been filtered.")
+    message("0 samples have been filtered.")
   } else {
     message(paste(nsamplesbefore-nsamplesafter, "samples and", nOTUsbefore-nOTUsafter,"OTUs have been filtered \nBefore:", nsamplesbefore, "samples and", nOTUsbefore, "OTUs\nAfter:", nsamplesafter, "samples and", nOTUsafter, "OTUs"))
   }

diff --git a/README.html b/README.html
@@ -119,7 +119,7 @@
 </div>
 
 
-<p><a href="https://travis-ci.org/MadsAlbertsen/ampvis2"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI5MCIgaGVpZ2h0PSIyMCI+PGxpbmVhckdyYWRpZW50IGlkPSJhIiB4Mj0iMCIgeTI9IjEwMCUiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iI2JiYiIgc3RvcC1vcGFjaXR5PSIuMSIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1vcGFjaXR5PSIuMSIvPjwvbGluZWFyR3JhZGllbnQ+PHJlY3Qgcng9IjMiIHdpZHRoPSI5MCIgaGVpZ2h0PSIyMCIgZmlsbD0iIzU1NSIvPjxyZWN0IHJ4PSIzIiB4PSIzNyIgd2lkdGg9IjUzIiBoZWlnaHQ9IjIwIiBmaWxsPSIjNGMxIi8+PHBhdGggZmlsbD0iIzRjMSIgZD0iTTM3IDBoNHYyMGgtNHoiLz48cmVjdCByeD0iMyIgd2lkdGg9IjkwIiBoZWlnaHQ9IjIwIiBmaWxsPSJ1cmwoI2EpIi8+PGcgZmlsbD0iI2ZmZiIgdGV4dC1hbmNob3I9Im1pZGRsZSIgZm9udC1mYW1pbHk9IkRlamFWdSBTYW5zLFZlcmRhbmEsR2VuZXZhLHNhbnMtc2VyaWYiIGZvbnQtc2l6ZT0iMTEiPjx0ZXh0IHg9IjE5LjUiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5idWlsZDwvdGV4dD48dGV4dCB4PSIxOS41IiB5PSIxNCI+YnVpbGQ8L3RleHQ+PHRleHQgeD0iNjIuNSIgeT0iMTUiIGZpbGw9IiMwMTAxMDEiIGZpbGwtb3BhY2l0eT0iLjMiPnBhc3Npbmc8L3RleHQ+PHRleHQgeD0iNjIuNSIgeT0iMTQiPnBhc3Npbmc8L3RleHQ+PC9nPjwvc3ZnPg==" alt="Travis-CI Build Status" /></a></p>
+<p><a href="https://travis-ci.org/MadsAlbertsen/ampvis2"><img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4MSIgaGVpZ2h0PSIyMCI+PGxpbmVhckdyYWRpZW50IGlkPSJhIiB4Mj0iMCIgeTI9IjEwMCUiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iI2JiYiIgc3RvcC1vcGFjaXR5PSIuMSIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1vcGFjaXR5PSIuMSIvPjwvbGluZWFyR3JhZGllbnQ+PHJlY3Qgcng9IjMiIHdpZHRoPSI4MSIgaGVpZ2h0PSIyMCIgZmlsbD0iIzU1NSIvPjxyZWN0IHJ4PSIzIiB4PSIzNyIgd2lkdGg9IjQ0IiBoZWlnaHQ9IjIwIiBmaWxsPSIjZTA1ZDQ0Ii8+PHBhdGggZmlsbD0iI2UwNWQ0NCIgZD0iTTM3IDBoNHYyMGgtNHoiLz48cmVjdCByeD0iMyIgd2lkdGg9IjgxIiBoZWlnaHQ9IjIwIiBmaWxsPSJ1cmwoI2EpIi8+PGcgZmlsbD0iI2ZmZiIgdGV4dC1hbmNob3I9Im1pZGRsZSIgZm9udC1mYW1pbHk9IkRlamFWdSBTYW5zLFZlcmRhbmEsR2VuZXZhLHNhbnMtc2VyaWYiIGZvbnQtc2l6ZT0iMTEiPjx0ZXh0IHg9IjE5LjUiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5idWlsZDwvdGV4dD48dGV4dCB4PSIxOS41IiB5PSIxNCI+YnVpbGQ8L3RleHQ+PHRleHQgeD0iNTgiIHk9IjE1IiBmaWxsPSIjMDEwMTAxIiBmaWxsLW9wYWNpdHk9Ii4zIj5mYWlsaW5nPC90ZXh0Pjx0ZXh0IHg9IjU4IiB5PSIxNCI+ZmFpbGluZzwvdGV4dD48L2c+PC9zdmc+" alt="Travis-CI Build Status" /></a></p>
 <div id="tools-for-visualising-amplicon-data" class="section level1">
 <h1>Tools for visualising amplicon data</h1>
 <p>ampvis2 is an R-package to conveniently visualise and analyse 16S rRNA amplicon data in different ways.</p>

diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-13-1.png
diff --git a/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/ampvis2_files/figure-html/unnamed-chunk-14-1.png
diff --git a/docs/reference/amp_subset_samples.html b/docs/reference/amp_subset_samples.html
diff --git a/man/amp_subset_samples.Rd b/man/amp_subset_samples.Rd