From 57b5bc08202ef6d7968924b564c761418e975ad0 Mon Sep 17 00:00:00 2001 From: olabiyi Date: Fri, 15 Nov 2024 09:29:41 -0800 Subject: [PATCH 1/2] Added chloroplast and mitochondria filtering --- .../workflow_code/bin/pairwise_ancombc1.R | 8 ++++++++ .../workflow_code/bin/pairwise_ancombc2.R | 9 ++++++++- .../NF_AmpIllumina-B/workflow_code/main.nf | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R index d76c1c77..4fcc5e45 100644 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R @@ -339,6 +339,14 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without # Dropping features that couldn't be assigned taxonomy taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),] +# Removing Chloroplast and Mitochondria Organelle DNA contamination +asvs2drop <- taxonomy_table %>% + unite(col="taxonomy",domain:species) %>% + filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>% + row.names() +taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),] + + # Get long asv taxonomy names and clean species <- taxonomy_table %>% unite(species,domain:species,sep = ";") %>% # Generalize this line -------- diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R index 3a7ae153..dbf3dd25 100644 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R @@ -394,6 +394,13 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without # Dropping features that couldn't be assigned taxonomy taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),] +# Removing Chloroplast and Mitochondria Organelle DNA contamination +asvs2drop <- taxonomy_table %>% + unite(col="taxonomy",domain:species) %>% + filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>% + row.names() +taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),] + # Get long asv taxonomy names and clean species <- taxonomy_table %>% unite(species,domain:species,sep = ";") %>% # Generalize this line -------- @@ -678,4 +685,4 @@ ggsave(filename = glue("{output_prefix}{feature}_boxplots{assay_suffix}.png"), p ) -message("Run completed sucessfully.") \ No newline at end of file +message("Run completed sucessfully.") diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf index 0c09019a..06df549b 100644 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/main.nf @@ -26,7 +26,7 @@ if (params.help) { println("Required arguments:") println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm]. singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively. - To combnine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """) + To combine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """) println("--input_file [PATH] A 4-column (single-end) or 5-column (paired-end) input file (sample_id, forward, [reverse,] paired, groups). Mandatory if a GLDS accession is not provided.") println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.") println(" The sample_id column should contain unique sample ids.") @@ -66,7 +66,7 @@ if (params.help) { println() println("Diversity and Differential abundance testing parameters:") println(" --diff_abund_method [STRING] The method to use for differential abundance testing. Either ['ancombc1', 'ancombc2', or 'deseq2'] respectively. Default: 'ancombc2' ") - println(" --rarefaction_depth [STRING] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.") + println(" --rarefaction_depth [INTEGER] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.") println(" --group [STRING] Column in input csv file with treatments to be compared. Default: 'groups' ") println(" --samples_column [STRING] Column in input csv file with sample names belonging to each treatment group. Default: 'sample_id' ") println() From 1dbfe6699942d8cb4d82f5d2c9c9c7945205d75a Mon Sep 17 00:00:00 2001 From: olabiyi Date: Fri, 15 Nov 2024 11:28:38 -0800 Subject: [PATCH 2/2] Deleted R here package usage --- .../NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R | 8 ++++---- .../NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R | 8 ++++---- .../workflow_code/bin/pairwise_ancombc1.R | 8 ++++---- .../workflow_code/bin/pairwise_ancombc2.R | 8 ++++---- .../NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R | 8 ++++---- .../NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R | 8 ++++---- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R index 9688eea1..7fce672c 100755 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/alpha_diversity.R @@ -210,10 +210,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter, # Required variables -metadata_file <- here(opt[["metadata-table"]]) -features_file <- here(opt[["feature-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -alpha_diversity_out_dir <- here("alpha_diversity/") +metadata_file <- opt[["metadata-table"]] +features_file <- opt[["feature-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +alpha_diversity_out_dir <-"alpha_diversity/" if(!dir.exists(alpha_diversity_out_dir)) dir.create(alpha_diversity_out_dir) # Metadata group column name to compare groups_colname <- opt[["group"]] diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R index d7a0e15c..53ce4128 100755 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/beta_diversity.R @@ -221,10 +221,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter, # 2. Add rarefaction # Required variables -metadata_file <- here(opt[["metadata-table"]]) -features_file <- here(opt[["feature-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -beta_diversity_out_dir <- here("beta_diversity/") +metadata_file <- opt[["metadata-table"]] +features_file <- opt[["feature-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +beta_diversity_out_dir <- "beta_diversity/" if(!dir.exists(beta_diversity_out_dir)) dir.create(beta_diversity_out_dir) # Metadata group column name to compare groups_colname <- opt[["group"]] diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R index 4fcc5e45..e136bd8e 100644 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc1.R @@ -290,9 +290,9 @@ publication_format <- theme_bw() + group <- opt[["group"]] # "groups" samples_column <- opt[["samples-column"]] # "Sample Name" threads <- opt[["cpus"]] # 8 -metadata_file <- here(opt[["metadata-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -feature_table_file <- here(opt[["feature-table"]]) +metadata_file <- opt[["metadata-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +feature_table_file <- opt[["feature-table"]] feature <- opt[["feature-type"]] # "ASV" output_prefix <- opt[["output-prefix"]] assay_suffix <- opt[["assay-suffix"]] @@ -301,7 +301,7 @@ assay_suffix <- opt[["assay-suffix"]] prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%) # sample / library read count cutoff library_cutoff <- opt[["library-cutoff"]] # 100 -diff_abund_out_dir <- here("differential_abundance/") +diff_abund_out_dir <- "differential_abundance/" if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir) diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R index dbf3dd25..227f8fb2 100644 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/pairwise_ancombc2.R @@ -346,9 +346,9 @@ publication_format <- theme_bw() + group <- opt[["group"]] # "groups" samples_column <- opt[["samples-column"]] # "Sample Name" threads <- opt[["cpus"]] # 8 -metadata_file <- here(opt[["metadata-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -feature_table_file <- here(opt[["feature-table"]]) +metadata_file <- opt[["metadata-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +feature_table_file <- opt[["feature-table"]] feature <- opt[["feature-type"]] # "ASV" output_prefix <- opt[["output-prefix"]] assay_suffix <- opt[["assay-suffix"]] @@ -357,7 +357,7 @@ assay_suffix <- opt[["assay-suffix"]] prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%) # sample / library read count cutoff library_cutoff <- opt[["library-cutoff"]] # 100 -diff_abund_out_dir <- here("differential_abundance/") +diff_abund_out_dir <- "differential_abundance/" if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir) # ------------------------ Read metadata ---------------------------------- # diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R index 8f5051d2..717950cd 100755 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/plot_taxonomy.R @@ -361,10 +361,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter, # Required variables -metadata_file <- here(opt[["metadata-table"]]) -features_file <- here(opt[["feature-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -taxonomy_plots_out_dir <- here("taxonomy_plots/") +metadata_file <- opt[["metadata-table"]] +features_file <- opt[["feature-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +taxonomy_plots_out_dir <- "taxonomy_plots/" if(!dir.exists(taxonomy_plots_out_dir)) dir.create(taxonomy_plots_out_dir) # Metadata group column name to compare groups_colname <- opt[["group"]] diff --git a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R index e63a555d..d534ab18 100755 --- a/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R +++ b/Amplicon/Illumina/Workflow_Documentation/NF_AmpIllumina-B/workflow_code/bin/run_deseq2.R @@ -130,9 +130,9 @@ library(DESeq2) group <- opt[["group"]] # "groups" samples_column <- opt[["samples-column"]] # "Sample Name" threads <- opt[["cpus"]] # 8 -metadata_file <- here(opt[["metadata-table"]]) -taxonomy_file <- here(opt[["taxonomy-table"]]) -feature_table_file <- here(opt[["feature-table"]]) +metadata_file <- opt[["metadata-table"]] +taxonomy_file <- opt[["taxonomy-table"]] +feature_table_file <- opt[["feature-table"]] feature <- opt[["feature-type"]] # "ASV" output_prefix <- opt[["output-prefix"]] assay_suffix <- opt[["assay-suffix"]] @@ -141,7 +141,7 @@ assay_suffix <- opt[["assay-suffix"]] prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%) # sample / library read count cutoff library_cutoff <- opt[["library-cutoff"]] # 100 -diff_abund_out_dir <- here("differential_abundance/") +diff_abund_out_dir <- "differential_abundance/" if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)