-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #123 from torres-alexis/DEV_GeneLab_Reference_Anno…
…tations_vGL-DPPD-7110-A [GL_RefAnnotTable] Add Docker/Singularity
- Loading branch information
Showing
5 changed files
with
141 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,9 +136,9 @@ The default columns in the annotation table are: | |
| Program | Version | Relevant Links | | ||
|:----------------|:-------:|:---------------| | ||
| R | 4.4.0 | [https://www.r-project.org/](https://www.r-project.org/) | | ||
| Bioconductor | 3.19.1 | [https://bioconductor.org](https://bioconductor.org) | | ||
| Bioconductor | 3.19 | [https://bioconductor.org](https://bioconductor.org) | | ||
| tidyverse | 2.0.0 | [https://www.tidyverse.org](https://www.tidyverse.org) | | ||
| STRINGdb | 2.16.0 | [https://www.bioconductor.org/packages/release/bioc/html/STRINGdb.html](https://www.bioconductor.org/packages/release/bioc/html/STRINGdb.html) | | ||
| STRINGdb | 2.16.4 | [https://www.bioconductor.org/packages/release/bioc/html/STRINGdb.html](https://www.bioconductor.org/packages/release/bioc/html/STRINGdb.html) | | ||
| PANTHER.db | 1.0.12 | [https://bioconductor.org/packages/release/data/annotation/html/PANTHER.db.html](https://www.bioconductor.org/packages/release/data/annotation/html/PANTHER.db.html) | | ||
| rtracklayer | 1.64.0 | [https://bioconductor.org/packages/release/bioc/html/rtracklayer.html](https://www.bioconductor.org/packages/release/bioc/html/rtracklayer.html) | | ||
| org.At.tair.db | 3.19.1 | [https://bioconductor.org/packages/release/data/annotation/html/org.At.tair.db.html](https://www.bioconductor.org/packages/release/data/annotation/html/org.At.tair.db.html) | | ||
|
@@ -182,6 +182,10 @@ Current GeneLab annotation tables are available on [figshare](https://figshare.c | |
## 0. Set Up Environment | ||
|
||
```R | ||
# Set R library path to current working directory | ||
lib_path <- file.path(getwd()) | ||
.libPaths(lib_path) | ||
|
||
# Define variables associated with current pipeline and annotation table versions | ||
GL_DPPD_ID <- "GL-DPPD-7110-A" | ||
ref_tab_path <- "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/master/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv" | ||
|
@@ -253,7 +257,7 @@ base_output_name <- str_replace(base_gtf_filename, ".gtf.gz", "") | |
|
||
# Add the species name to base_output_name if the reference source is not ENSEMBL | ||
if (!(ref_source %in% c("ensembl_plants", "ensembl_bacteria", "ensembl"))) { | ||
base_output_name <- paste(str_replace(target_species_designation, " ", "_"), base_output_name, sep = "_") | ||
base_output_name <- paste(str_replace(target_organism, " ", "_"), base_output_name, sep = "_") | ||
} | ||
|
||
out_table_filename <- paste0(base_output_name, "-GL-annotations.tsv") | ||
|
@@ -294,42 +298,52 @@ if ( file.exists(out_table_filename) ) { | |
# Use AnnotationForge's makeOrgPackageFromNCBI function with default settings to create the organism-specific org.db R package from available NCBI annotations | ||
|
||
# Try to download the org.db from Bioconductor, build it locally if installation fails | ||
BiocManager::install(target_org_db, ask = FALSE) | ||
if (!requireNamespace(target_org_db, quietly = TRUE)) { | ||
BiocManager::install(target_org_db, ask = FALSE) | ||
if (!requireNamespace(target_org_db, quietly = TRUE)) { | ||
tryCatch({ | ||
# Parse organism's name in the reference table to create the org.db name (target_org_db) | ||
genus_species <- strsplit(target_organism, " ")[[1]] | ||
if (length(genus_species) < 1) { | ||
stop("Species designation is not correctly formatted: ", target_organism) | ||
stop("Species designation is not correctly formatted: ", target_organism) | ||
} | ||
|
||
genus <- genus_species[1] | ||
species <- ifelse(length(genus_species) > 1, genus_species[2], "") | ||
strain <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(strain) %>% | ||
gsub("[^A-Za-z0-9]", "", .) | ||
|
||
if (!is.na(strain) && strain != "") { | ||
species <- paste0(species, strain) | ||
species <- paste0(species, strain) | ||
} | ||
|
||
# Get package name or build it if not provided | ||
target_org_db <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(annotations) | ||
|
||
if (is.na(target_org_db) || target_org_db == "") { | ||
cat("\nNo annotation database specified. Constructing package name...\n") | ||
target_org_db <- paste0("org.", substr(genus, 1, 1), species, ".eg.db") | ||
} | ||
target_org_db <- paste0("org.", substr(genus, 1, 1), species, ".eg.db") | ||
|
||
BiocManager::install(c("AnnotationForge", "biomaRt", "GO.db"), ask = FALSE) | ||
BiocManager::install(c("AnnotationForge", "biomaRt", "GO.db"), ask = FALSE) | ||
library(AnnotationForge) | ||
makeOrgPackageFromNCBI( | ||
version = "0.1", | ||
author = "Your Name <[email protected]>", | ||
maintainer = "Your Name <[email protected]>", | ||
outputDir = "./", | ||
tax_id = target_taxid, | ||
genus = genus, | ||
species = species | ||
makeOrgPackageFromNCBI( | ||
version = "0.1", | ||
author = "Your Name <[email protected]>", | ||
maintainer = "Your Name <[email protected]>", | ||
outputDir = "./", | ||
tax_id = target_taxid, | ||
genus = genus, | ||
species = species | ||
) | ||
install.packages(file.path("./", target_org_db), repos = NULL, type = "source", quiet = TRUE) | ||
cat(paste0("'", target_org_db, "' has been successfully built and installed.\n")) | ||
}, error = function(e) { | ||
stop("Failed to build and load the package: ", target_org_db, "\nError: ", e$message) | ||
stop("Failed to build and load the package: ", target_org_db, "\nError: ", e$message) | ||
}) | ||
target_org_db <- install_annotations(target_organism, ref_tab_path) | ||
} | ||
``` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,35 +13,36 @@ install_annotations <- function(target_organism, refTablePath) { | |
filter(species == target_organism) %>% | ||
pull(taxon) | ||
|
||
# Parse organism's name in the reference table to create the org.db name (target_org_db) | ||
target_species_designation <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(species) %>% | ||
gsub("\\s+", " ", .) %>% | ||
gsub("[^A-Za-z0-9 ]", "", .) | ||
|
||
genus_species <- strsplit(target_species_designation, " ")[[1]] | ||
if (length(genus_species) < 1) { | ||
stop("Species designation is not correctly formatted: ", target_species_designation) | ||
} | ||
|
||
genus <- genus_species[1] | ||
species <- ifelse(length(genus_species) > 1, genus_species[2], "") | ||
strain <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(strain) %>% | ||
gsub("[^A-Za-z0-9]", "", .) | ||
|
||
if (!is.na(strain) && strain != "") { | ||
species <- paste0(species, strain) | ||
} | ||
|
||
# Get package name or build it if not provided | ||
target_org_db <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(annotations) | ||
|
||
if (is.na(target_org_db) || target_org_db == "") { | ||
cat("\nNo annotation database specified. Constructing package name...\n") | ||
target_species_designation <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(species) %>% | ||
gsub("\\s+", " ", .) %>% | ||
gsub("[^A-Za-z0-9 ]", "", .) | ||
|
||
genus_species <- strsplit(target_species_designation, " ")[[1]] | ||
if (length(genus_species) < 1) { | ||
stop("Species designation is not correctly formatted: ", target_species_designation) | ||
} | ||
|
||
genus <- genus_species[1] | ||
species <- ifelse(length(genus_species) > 1, genus_species[2], "") | ||
strain <- ref_table %>% | ||
filter(species == target_organism) %>% | ||
pull(strain) %>% | ||
gsub("[^A-Za-z0-9]", "", .) | ||
|
||
if (!is.na(strain) && strain != "") { | ||
species <- paste0(species, strain) | ||
} | ||
|
||
target_org_db <- paste0("org.", substr(genus, 1, 1), species, ".eg.db") | ||
} | ||
|
||
|
@@ -56,25 +57,25 @@ install_annotations <- function(target_organism, refTablePath) { | |
} else { | ||
cat(paste0("\nInstallation from Bioconductor failed, attempting to build '", target_org_db, "'...\n")) | ||
if (!dir.exists(target_org_db)) { | ||
tryCatch({ | ||
BiocManager::install(c("AnnotationForge", "biomaRt", "GO.db"), ask = FALSE) | ||
library(AnnotationForge) | ||
makeOrgPackageFromNCBI( | ||
version = "0.1", | ||
author = "Your Name <[email protected]>", | ||
maintainer = "Your Name <[email protected]>", | ||
outputDir = "./", | ||
tax_id = target_taxid, | ||
genus = genus, | ||
species = species | ||
) | ||
install.packages(file.path("./", target_org_db), repos = NULL, type = "source", quiet = TRUE) | ||
cat(paste0("'", target_org_db, "' has been successfully built and installed.\n")) | ||
}, error = function(e) { | ||
stop("Failed to build and load the package: ", target_org_db, "\nError: ", e$message) | ||
}) | ||
tryCatch({ | ||
BiocManager::install(c("AnnotationForge", "biomaRt", "GO.db"), ask = FALSE) | ||
library(AnnotationForge) | ||
makeOrgPackageFromNCBI( | ||
version = "0.1", | ||
author = "Your Name <[email protected]>", | ||
maintainer = "Your Name <[email protected]>", | ||
outputDir = "./", | ||
tax_id = target_taxid, | ||
genus = genus, | ||
species = species | ||
) | ||
install.packages(file.path("./", target_org_db), repos = NULL, type = "source", quiet = TRUE) | ||
cat(paste0("'", target_org_db, "' has been successfully built and installed.\n")) | ||
}, error = function(e) { | ||
stop("Failed to build and load the package: ", target_org_db, "\nError: ", e$message) | ||
}) | ||
} else { | ||
cat(paste0("Local annotation package ", target_org_db, " already exists. This local package will be installed.")) | ||
cat(paste0("Local annotation package ", target_org_db, " already exists. This local package will be installed.\n")) | ||
install.packages(file.path("./", target_org_db), repos = NULL, type = "source", quiet = TRUE) | ||
} | ||
} | ||
|
@@ -83,4 +84,4 @@ install_annotations <- function(target_organism, refTablePath) { | |
library(target_org_db, character.only = TRUE) | ||
cat(paste0("Using Annotation Database '", target_org_db, "'.\n")) | ||
return(target_org_db) | ||
} | ||
} |