Merge pull request #132 from olabiyi/DEV_Amplicon_Illumina_NF_conversion

Amplicon Illumina nextflow conversion: deleted here and added organelle filtering
nasa · Nov 15, 2024 · 49ce1aa · 49ce1aa
2 parents 1716f14 + 1dbfe66
commit 49ce1aa
Show file tree

Hide file tree

Showing 7 changed files with 42 additions and 27 deletions.
diff --git a/...icon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R b/...icon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R
@@ -210,10 +210,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,
 
 
 # Required variables
-metadata_file <- here(opt[["metadata-table"]]) 
-features_file <- here(opt[["feature-table"]])
-taxonomy_file <- here(opt[["taxonomy-table"]])
-alpha_diversity_out_dir <- here("alpha_diversity/")
+metadata_file <- opt[["metadata-table"]] 
+features_file <- opt[["feature-table"]]
+taxonomy_file <- opt[["taxonomy-table"]]
+alpha_diversity_out_dir <-"alpha_diversity/"
 if(!dir.exists(alpha_diversity_out_dir)) dir.create(alpha_diversity_out_dir)
 # Metadata group column name to compare
 groups_colname <- opt[["group"]]

diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R
@@ -221,10 +221,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,
 # 2. Add rarefaction
 
 # Required variables
-metadata_file <- here(opt[["metadata-table"]])
-features_file <-  here(opt[["feature-table"]]) 
-taxonomy_file <-  here(opt[["taxonomy-table"]])
-beta_diversity_out_dir <- here("beta_diversity/")
+metadata_file <- opt[["metadata-table"]]
+features_file <-  opt[["feature-table"]] 
+taxonomy_file <-  opt[["taxonomy-table"]]
+beta_diversity_out_dir <- "beta_diversity/"
 if(!dir.exists(beta_diversity_out_dir)) dir.create(beta_diversity_out_dir)
 # Metadata group column name to compare
 groups_colname <- opt[["group"]]

diff --git a/...on/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R b/...on/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R
@@ -290,9 +290,9 @@ publication_format <- theme_bw() +
 group <- opt[["group"]]  # "groups"
 samples_column <- opt[["samples-column"]] # "Sample Name"
 threads <- opt[["cpus"]] # 8
-metadata_file <- here(opt[["metadata-table"]])
-taxonomy_file <-  here(opt[["taxonomy-table"]])
-feature_table_file <- here(opt[["feature-table"]]) 
+metadata_file <- opt[["metadata-table"]]
+taxonomy_file <-  opt[["taxonomy-table"]]
+feature_table_file <- opt[["feature-table"]]
 feature <- opt[["feature-type"]]   # "ASV"
 output_prefix <- opt[["output-prefix"]]
 assay_suffix <- opt[["assay-suffix"]]
@@ -301,7 +301,7 @@ assay_suffix <- opt[["assay-suffix"]]
 prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
 # sample / library read count cutoff
 library_cutoff <- opt[["library-cutoff"]]  # 100
-diff_abund_out_dir <- here("differential_abundance/")
+diff_abund_out_dir <- "differential_abundance/"
 if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)
 
 
@@ -339,6 +339,14 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without
 # Dropping features that couldn't be assigned taxonomy
 taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),]
 
+# Removing Chloroplast and Mitochondria Organelle DNA contamination
+asvs2drop <- taxonomy_table %>%
+  unite(col="taxonomy",domain:species) %>%
+  filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>%
+  row.names()
+taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),]
+
+
 # Get long asv taxonomy names and clean
 species <- taxonomy_table %>%
   unite(species,domain:species,sep = ";") %>% # Generalize this line -------- 

diff --git a/...on/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R b/...on/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R
@@ -346,9 +346,9 @@ publication_format <- theme_bw() +
 group <- opt[["group"]]  # "groups"
 samples_column <- opt[["samples-column"]] # "Sample Name"
 threads <- opt[["cpus"]] # 8
-metadata_file <- here(opt[["metadata-table"]])
-taxonomy_file <-  here(opt[["taxonomy-table"]])
-feature_table_file <- here(opt[["feature-table"]]) 
+metadata_file <- opt[["metadata-table"]]
+taxonomy_file <-  opt[["taxonomy-table"]]
+feature_table_file <- opt[["feature-table"]]
 feature <- opt[["feature-type"]]   # "ASV"
 output_prefix <- opt[["output-prefix"]]
 assay_suffix <- opt[["assay-suffix"]]
@@ -357,7 +357,7 @@ assay_suffix <- opt[["assay-suffix"]]
 prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
 # sample / library read count cutoff
 library_cutoff <- opt[["library-cutoff"]]  # 100
-diff_abund_out_dir <- here("differential_abundance/")
+diff_abund_out_dir <- "differential_abundance/"
 if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)
 
 # ------------------------ Read metadata ---------------------------------- #
@@ -394,6 +394,13 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without
 # Dropping features that couldn't be assigned taxonomy
 taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),]
 
+# Removing Chloroplast and Mitochondria Organelle DNA contamination
+asvs2drop <- taxonomy_table %>%
+  unite(col="taxonomy",domain:species) %>%
+  filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>%
+  row.names()
+taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),]
+
 # Get long asv taxonomy names and clean
 species <- taxonomy_table %>%
   unite(species,domain:species,sep = ";") %>% # Generalize this line -------- 
@@ -678,4 +685,4 @@ ggsave(filename = glue("{output_prefix}{feature}_boxplots{assay_suffix}.png"), p
 )
 
 
-message("Run completed sucessfully.")
+message("Run completed sucessfully.")
diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R
@@ -361,10 +361,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,
 
 
 # Required variables
-metadata_file <- here(opt[["metadata-table"]]) 
-features_file <- here(opt[["feature-table"]])
-taxonomy_file <- here(opt[["taxonomy-table"]])
-taxonomy_plots_out_dir <- here("taxonomy_plots/")
+metadata_file <- opt[["metadata-table"]]
+features_file <- opt[["feature-table"]]
+taxonomy_file <- opt[["taxonomy-table"]]
+taxonomy_plots_out_dir <- "taxonomy_plots/"
 if(!dir.exists(taxonomy_plots_out_dir)) dir.create(taxonomy_plots_out_dir)
 # Metadata group column name to compare
 groups_colname <- opt[["group"]]

diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R
@@ -130,9 +130,9 @@ library(DESeq2)
 group <- opt[["group"]]  # "groups"
 samples_column <- opt[["samples-column"]] # "Sample Name"
 threads <- opt[["cpus"]] # 8
-metadata_file <- here(opt[["metadata-table"]])
-taxonomy_file <-  here(opt[["taxonomy-table"]])
-feature_table_file <- here(opt[["feature-table"]]) 
+metadata_file <- opt[["metadata-table"]]
+taxonomy_file <-  opt[["taxonomy-table"]]
+feature_table_file <- opt[["feature-table"]]
 feature <- opt[["feature-type"]]   # "ASV"
 output_prefix <- opt[["output-prefix"]]
 assay_suffix <- opt[["assay-suffix"]]
@@ -141,7 +141,7 @@ assay_suffix <- opt[["assay-suffix"]]
 prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
 # sample / library read count cutoff
 library_cutoff <- opt[["library-cutoff"]]  # 100
-diff_abund_out_dir <- here("differential_abundance/")
+diff_abund_out_dir <- "differential_abundance/"
 if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)
 
 

diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf
@@ -26,7 +26,7 @@ if (params.help) {
   println("Required arguments:")
   println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm].
 	         singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
-                 To combnine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)			 
+                 To combine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)			 
   println("--input_file  [PATH] A 4-column (single-end) or 5-column (paired-end) input file (sample_id, forward, [reverse,] paired, groups). Mandatory if a GLDS accession is not provided.")
   println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
   println(" The sample_id column should contain unique sample ids.")
@@ -66,7 +66,7 @@ if (params.help) {
   println()
   println("Diversity and Differential abundance testing parameters:")
   println("         --diff_abund_method [STRING] The method to use for differential abundance testing. Either ['ancombc1', 'ancombc2', or 'deseq2'] respectively. Default: 'ancombc2' ")
-  println("         --rarefaction_depth [STRING] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.")
+  println("         --rarefaction_depth [INTEGER] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.")
   println("         --group [STRING] Column in input csv file with treatments to be compared. Default: 'groups' ")
   println("         --samples_column [STRING] Column in input csv file with sample names belonging to each treatment group. Default: 'sample_id' ")
   println()