Skip to content

Commit

Permalink
Merge pull request #132 from olabiyi/DEV_Amplicon_Illumina_NF_conversion
Browse files Browse the repository at this point in the history
Amplicon Illumina nextflow conversion: deleted here and added organelle filtering
  • Loading branch information
asaravia-butler authored Nov 15, 2024
2 parents 1716f14 + 1dbfe66 commit 49ce1aa
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,


# Required variables
metadata_file <- here(opt[["metadata-table"]])
features_file <- here(opt[["feature-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
alpha_diversity_out_dir <- here("alpha_diversity/")
metadata_file <- opt[["metadata-table"]]
features_file <- opt[["feature-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
alpha_diversity_out_dir <-"alpha_diversity/"
if(!dir.exists(alpha_diversity_out_dir)) dir.create(alpha_diversity_out_dir)
# Metadata group column name to compare
groups_colname <- opt[["group"]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,
# 2. Add rarefaction

# Required variables
metadata_file <- here(opt[["metadata-table"]])
features_file <- here(opt[["feature-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
beta_diversity_out_dir <- here("beta_diversity/")
metadata_file <- opt[["metadata-table"]]
features_file <- opt[["feature-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
beta_diversity_out_dir <- "beta_diversity/"
if(!dir.exists(beta_diversity_out_dir)) dir.create(beta_diversity_out_dir)
# Metadata group column name to compare
groups_colname <- opt[["group"]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,9 @@ publication_format <- theme_bw() +
group <- opt[["group"]] # "groups"
samples_column <- opt[["samples-column"]] # "Sample Name"
threads <- opt[["cpus"]] # 8
metadata_file <- here(opt[["metadata-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
feature_table_file <- here(opt[["feature-table"]])
metadata_file <- opt[["metadata-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
feature_table_file <- opt[["feature-table"]]
feature <- opt[["feature-type"]] # "ASV"
output_prefix <- opt[["output-prefix"]]
assay_suffix <- opt[["assay-suffix"]]
Expand All @@ -301,7 +301,7 @@ assay_suffix <- opt[["assay-suffix"]]
prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
# sample / library read count cutoff
library_cutoff <- opt[["library-cutoff"]] # 100
diff_abund_out_dir <- here("differential_abundance/")
diff_abund_out_dir <- "differential_abundance/"
if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)


Expand Down Expand Up @@ -339,6 +339,14 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without
# Dropping features that couldn't be assigned taxonomy
taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),]

# Removing Chloroplast and Mitochondria Organelle DNA contamination
asvs2drop <- taxonomy_table %>%
unite(col="taxonomy",domain:species) %>%
filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>%
row.names()
taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),]


# Get long asv taxonomy names and clean
species <- taxonomy_table %>%
unite(species,domain:species,sep = ";") %>% # Generalize this line --------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,9 @@ publication_format <- theme_bw() +
group <- opt[["group"]] # "groups"
samples_column <- opt[["samples-column"]] # "Sample Name"
threads <- opt[["cpus"]] # 8
metadata_file <- here(opt[["metadata-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
feature_table_file <- here(opt[["feature-table"]])
metadata_file <- opt[["metadata-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
feature_table_file <- opt[["feature-table"]]
feature <- opt[["feature-type"]] # "ASV"
output_prefix <- opt[["output-prefix"]]
assay_suffix <- opt[["assay-suffix"]]
Expand All @@ -357,7 +357,7 @@ assay_suffix <- opt[["assay-suffix"]]
prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
# sample / library read count cutoff
library_cutoff <- opt[["library-cutoff"]] # 100
diff_abund_out_dir <- here("differential_abundance/")
diff_abund_out_dir <- "differential_abundance/"
if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)

# ------------------------ Read metadata ---------------------------------- #
Expand Down Expand Up @@ -394,6 +394,13 @@ print(glue("There are {sum(taxonomy_table$domain == 'Other')} features without
# Dropping features that couldn't be assigned taxonomy
taxonomy_table <- taxonomy_table[-which(taxonomy_table$domain == 'Other'),]

# Removing Chloroplast and Mitochondria Organelle DNA contamination
asvs2drop <- taxonomy_table %>%
unite(col="taxonomy",domain:species) %>%
filter(str_detect(taxonomy, "[Cc]hloroplast|[Mn]itochondria")) %>%
row.names()
taxonomy_table <- taxonomy_table[!(rownames(taxonomy_table) %in% asvs2drop),]

# Get long asv taxonomy names and clean
species <- taxonomy_table %>%
unite(species,domain:species,sep = ";") %>% # Generalize this line --------
Expand Down Expand Up @@ -678,4 +685,4 @@ ggsave(filename = glue("{output_prefix}{feature}_boxplots{assay_suffix}.png"), p
)


message("Run completed sucessfully.")
message("Run completed sucessfully.")
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,10 @@ custom_palette <- custom_palette[-c(21:23, grep(pattern = pattern_to_filter,


# Required variables
metadata_file <- here(opt[["metadata-table"]])
features_file <- here(opt[["feature-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
taxonomy_plots_out_dir <- here("taxonomy_plots/")
metadata_file <- opt[["metadata-table"]]
features_file <- opt[["feature-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
taxonomy_plots_out_dir <- "taxonomy_plots/"
if(!dir.exists(taxonomy_plots_out_dir)) dir.create(taxonomy_plots_out_dir)
# Metadata group column name to compare
groups_colname <- opt[["group"]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ library(DESeq2)
group <- opt[["group"]] # "groups"
samples_column <- opt[["samples-column"]] # "Sample Name"
threads <- opt[["cpus"]] # 8
metadata_file <- here(opt[["metadata-table"]])
taxonomy_file <- here(opt[["taxonomy-table"]])
feature_table_file <- here(opt[["feature-table"]])
metadata_file <- opt[["metadata-table"]]
taxonomy_file <- opt[["taxonomy-table"]]
feature_table_file <- opt[["feature-table"]]
feature <- opt[["feature-type"]] # "ASV"
output_prefix <- opt[["output-prefix"]]
assay_suffix <- opt[["assay-suffix"]]
Expand All @@ -141,7 +141,7 @@ assay_suffix <- opt[["assay-suffix"]]
prevalence_cutoff <- opt[["prevalence-cutoff"]] # 0.15 (15%)
# sample / library read count cutoff
library_cutoff <- opt[["library-cutoff"]] # 100
diff_abund_out_dir <- here("differential_abundance/")
diff_abund_out_dir <- "differential_abundance/"
if(!dir.exists(diff_abund_out_dir)) dir.create(diff_abund_out_dir)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if (params.help) {
println("Required arguments:")
println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm].
singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
To combnine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)
To combine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)
println("--input_file [PATH] A 4-column (single-end) or 5-column (paired-end) input file (sample_id, forward, [reverse,] paired, groups). Mandatory if a GLDS accession is not provided.")
println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
println(" The sample_id column should contain unique sample ids.")
Expand Down Expand Up @@ -66,7 +66,7 @@ if (params.help) {
println()
println("Diversity and Differential abundance testing parameters:")
println(" --diff_abund_method [STRING] The method to use for differential abundance testing. Either ['ancombc1', 'ancombc2', or 'deseq2'] respectively. Default: 'ancombc2' ")
println(" --rarefaction_depth [STRING] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.")
println(" --rarefaction_depth [INTEGER] The Minimum desired sample rarefaction depth for diversity analysis. Default: 500.")
println(" --group [STRING] Column in input csv file with treatments to be compared. Default: 'groups' ")
println(" --samples_column [STRING] Column in input csv file with sample names belonging to each treatment group. Default: 'sample_id' ")
println()
Expand Down

0 comments on commit 49ce1aa

Please sign in to comment.