From f015225fcee1d2743d9bc6e78892dc90a738df70 Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:42:34 -0700 Subject: [PATCH 01/12] remove lib path --- .../GL_RefAnnotTable-A/workflow_code/install-org-db.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R index f7e6f459..c1ad5613 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R @@ -1,9 +1,5 @@ # install-org-db.R -# Set R library path to current working directory -lib_path <- file.path(getwd()) -.libPaths(lib_path) - # Load required libraries library(tidyverse) library(AnnotationForge) @@ -116,4 +112,4 @@ if (!interactive()) { refTablePath <- if (length(args) > 1) args[2] else NULL install_annotations(target_organism, refTablePath) -} \ No newline at end of file +} From fce6a73a7389c12d1115c1b117533d478c3e6d31 Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:42:49 -0700 Subject: [PATCH 02/12] Update GL-DPPD-7110-A_build-genome-annots-tab.R remove lib path --- .../workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index f6b043a7..f390d4b0 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -3,9 +3,6 @@ # GeneLab script for generating organism-specific gene annotation tables # Example usage: Rscript GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' -# Set R library path to current working directory -lib_path <- file.path(getwd()) -.libPaths(lib_path) # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" From 3cc61cffd8096840a89cb6cd44af6fca8bd24942 Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:27:30 -0700 Subject: [PATCH 03/12] Add possible paths to install-org-db execution function --- .../GL-DPPD-7110-A_build-genome-annots-tab.R | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index f390d4b0..38e6f08c 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -145,19 +145,41 @@ GTF <- data.frame(GTF) # Define a function to load the specified org.db package for a given target organism install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path) { + # Folder names for the script location: Parent directories or . for executing from parent dir or cd. + ## No functionality to pull in the path of an executing R script is available + possible_folders <- c("workflow_code", "GL_RefAnnotTable-A_1.1.0", ".") + + # Get the current working directory and attempt to locate the correct folder + script_dir <- getwd() + + install_script_path <- NULL + + for (folder in possible_folders) { + potential_path <- file.path(script_dir, folder, "install-org-db.R") + if (file.exists(potential_path)) { + install_script_path <- potential_path + break + } + } + + # If the install script path was not found, stop with an error + if (is.null(install_script_path)) { + stop("Cannot find 'install-org-db.R' in the expected folders: 'workflow_code' or 'GL_RefAnnotTable-A_1.1.0'") + } + + # If target_org_db is provided, try to install it from Bioconductor if (!is.na(target_org_db) && target_org_db != "") { - # Attempt to install the package from Bioconductor BiocManager::install(target_org_db, ask = FALSE) # Check if the package was successfully loaded if (!requireNamespace(target_org_db, quietly = TRUE)) { - # If not, attempt to create it locally using a helper script - source("install-org-db.R") + # Source the install script to create the database locally + source(install_script_path) target_org_db <- install_annotations(target_organism, ref_tab_path) } } else { # If target_org_db is NA or empty, create it locally using the helper script - source("install-org-db.R") + source(install_script_path) target_org_db <- install_annotations(target_organism, ref_tab_path) } @@ -165,15 +187,6 @@ install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path library(target_org_db, character.only = TRUE) } -# Define list of supported organisms which do not use annotations from an org.db -no_org_db <- c("Lactobacillus acidophilus", "Mycobacterium marinum", "Oryza sativa", "Pseudomonas aeruginosa", - "Serratia liquefaciens", "Staphylococcus aureus", "Streptococcus mutans", "Vibrio fischeri") - -# Run the function unless the target_organism is in no_org_db -if (!(target_organism %in% no_org_db) && (target_organism %in% currently_accepted_orgs)) { - install_and_load_org_db(target_organism, target_org_db, ref_tab_path) -} - ############################################ ######## Build annotation table ############ From 8619121e019a0e12d8894c0847f75151aeaf3711 Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:30:43 -0700 Subject: [PATCH 04/12] Update GL-DPPD-7110-A_build-genome-annots-tab.R Store workflow version workflow_code folder name as a variable --- .../workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index 38e6f08c..5c62dab1 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -6,6 +6,7 @@ # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" +workflow_version <- "GL_RefAnnotTable-A_1.1.0" ref_tab_path <- "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/master/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv" readme_path <- "https://github.com/nasa/GeneLab_Data_Processing/tree/master/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md" @@ -147,7 +148,7 @@ GTF <- data.frame(GTF) install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path) { # Folder names for the script location: Parent directories or . for executing from parent dir or cd. ## No functionality to pull in the path of an executing R script is available - possible_folders <- c("workflow_code", "GL_RefAnnotTable-A_1.1.0", ".") + possible_folders <- c("workflow_code", workflow_version, ".") # Get the current working directory and attempt to locate the correct folder script_dir <- getwd() From 8bbf66d43a274adf33132e3497289d91a3d140e6 Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:37:08 -0700 Subject: [PATCH 05/12] Update GL-DPPD-7110-A.md Add workflow version variable (for finding install-org-db path) to pipeline documentation --- .../GL-DPPD-7110-A/GL-DPPD-7110-A.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md index bf6ffa64..561fe596 100644 --- a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md +++ b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md @@ -188,6 +188,8 @@ lib_path <- file.path(getwd()) # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" +workflow_version <- "GL_RefAnnotTable-A_1.1.0" + ref_tab_path <- "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/master/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv" readme_path <- "https://github.com/nasa/GeneLab_Data_Processing/tree/master/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md" @@ -213,6 +215,7 @@ library(rtracklayer) **Output Data:** - `GL_DPPD_ID` (variable specifying the GeneLab Data Processing Pipeline Document ID) +- `workflow_version (variable specifying the current version of the workflow) - `ref_tab_path` (variable specifying the path to the reference table CSV file) - `readme_path` (variable specifying the path to the README file) - `currently_accepted_orgs` (variable specifying the list of currently supported organisms) From 4bec1931ace9529019f86859f843e3c9a3150a4f Mon Sep 17 00:00:00 2001 From: Alexis <71944751+torres-alexis@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:13:52 -0700 Subject: [PATCH 06/12] Update GL-DPPD-7110-A_build-genome-annots-tab.R readd line --- .../GL-DPPD-7110-A_build-genome-annots-tab.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index 5c62dab1..afbccce8 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -188,6 +188,14 @@ install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path library(target_org_db, character.only = TRUE) } +# Define list of supported organisms which do not use annotations from an org.db +no_org_db <- c("Lactobacillus acidophilus", "Mycobacterium marinum", "Oryza sativa", "Pseudomonas aeruginosa", + "Serratia liquefaciens", "Staphylococcus aureus", "Streptococcus mutans", "Vibrio fischeri") + +# Run the function unless the target_organism is in no_org_db +if (!(target_organism %in% no_org_db) && (target_organism %in% currently_accepted_orgs)) { + install_and_load_org_db(target_organism, target_org_db, ref_tab_path) +} ############################################ ######## Build annotation table ############ From d1ea649bde96b2e317adf3bc4ddbdc88a480227b Mon Sep 17 00:00:00 2001 From: torres-alexis Date: Mon, 28 Oct 2024 23:44:04 -0700 Subject: [PATCH 07/12] remove custom org dbs from annotation table --- .../GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv index 5ce006d9..12a2c8b8 100644 --- a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv +++ b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv @@ -1,23 +1,23 @@ name,species,strain,ensemblVersion,ref_source,fasta,gtf,taxon,annotations,genelab_annots_link,genelab_annots_info_link ARABIDOPSIS,Arabidopsis thaliana,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/arabidopsis_thaliana/Arabidopsis_thaliana.TAIR10.59.gtf.gz,3702,org.At.tair.db,https://figshare.com/ndownloader/files/48354355,https://figshare.com/ndownloader/files/48354352 -BACSU,Bacillus subtilis,subsp. subtilis 168,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.59.gtf.gz,224308,org.Bsubtilissubspsubtilis168.eg.db,https://figshare.com/ndownloader/files/48354346,https://figshare.com/ndownloader/files/48354349 -BRADI,Brachypodium distachyon,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/brachypodium_distachyon/dna/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/brachypodium_distachyon/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.59.gtf.gz,15368,org.Bdistachyon.eg.db,https://figshare.com/ndownloader/files/48354370,https://figshare.com/ndownloader/files/48354361 +BACSU,Bacillus subtilis,subsp. subtilis 168,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.59.gtf.gz,224308,,https://figshare.com/ndownloader/files/48354346,https://figshare.com/ndownloader/files/48354349 +BRADI,Brachypodium distachyon,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/brachypodium_distachyon/dna/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/brachypodium_distachyon/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.59.gtf.gz,15368,,https://figshare.com/ndownloader/files/48354370,https://figshare.com/ndownloader/files/48354361 BRARP,Brassica rapa,,59,ensembl_plants,http://ftp.ensemblgenomes.org/pub/plants/release-59/fasta/brassica_rapa/dna/Brassica_rapa.Brapa_1.0.dna.toplevel.fa.gz,http://ftp.ensemblgenomes.org/pub/plants/release-59/gtf/brassica_rapa/Brassica_rapa.Brapa_1.0.59.gtf.gz,,,, WORM,Caenorhabditis elegans,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna.toplevel.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.112.gtf.gz,6239,org.Ce.eg.db,https://figshare.com/ndownloader/files/48354373,https://figshare.com/ndownloader/files/48354364 ZEBRAFISH,Danio rerio,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna.primary_assembly.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/danio_rerio/Danio_rerio.GRCz11.112.gtf.gz,7955,org.Dr.eg.db,https://figshare.com/ndownloader/files/48354388,https://figshare.com/ndownloader/files/48354367 FLY,Drosophila melanogaster,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz,7227,org.Dm.eg.db,https://figshare.com/ndownloader/files/48354382,https://figshare.com/ndownloader/files/48354376 ERCC,,,,ThermoFisher,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,,,, -ECOLI,Escherichia coli,str. K-12 substr. MG1655,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/dna/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.59.gtf.gz,511145,org.EcolistrK12substrMG1655.eg.db,https://figshare.com/ndownloader/files/48354379,https://figshare.com/ndownloader/files/48354394 +ECOLI,Escherichia coli,str. K-12 substr. MG1655,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/dna/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.59.gtf.gz,511145,,https://figshare.com/ndownloader/files/48354379,https://figshare.com/ndownloader/files/48354394 HUMAN,Homo sapiens,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz,9606,org.Hs.eg.db,https://figshare.com/ndownloader/files/48354445,https://figshare.com/ndownloader/files/48354448 ,Lactobacillus acidophilus,NCFM,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.gtf.gz,272621,,https://figshare.com/ndownloader/files/49061254,https://figshare.com/ndownloader/files/49061257 MOUSE,Mus musculus,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/mus_musculus/Mus_musculus.GRCm39.112.gtf.gz,10090,org.Mm.eg.db,https://figshare.com/ndownloader/files/48354460,https://figshare.com/ndownloader/files/48354457 ,Mycobacterium marinum,M,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,216594,,https://figshare.com/ndownloader/files/49061260,https://figshare.com/ndownloader/files/49061263 ORYSJ,Oryza sativa,Japonica,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/oryza_sativa/dna/Oryza_sativa.IRGSP-1.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/oryza_sativa/Oryza_sativa.IRGSP-1.0.59.gtf.gz,39947,,https://figshare.com/ndownloader/files/48354451,https://figshare.com/ndownloader/files/48354454 -ORYLA,Oryzias latipes,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/oryzias_latipes/dna/Oryzias_latipes.ASM223467v1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/oryzias_latipes/Oryzias_latipes.ASM223467v1.112.gtf.gz,8090,org.Olatipes.eg.db,https://figshare.com/ndownloader/files/48354463,https://figshare.com/ndownloader/files/48354466 +ORYLA,Oryzias latipes,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/oryzias_latipes/dna/Oryzias_latipes.ASM223467v1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/oryzias_latipes/Oryzias_latipes.ASM223467v1.112.gtf.gz,8090,,https://figshare.com/ndownloader/files/48354463,https://figshare.com/ndownloader/files/48354466 ,Pseudomonas aeruginosa,UCBPP-PA14,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.gtf.gz,208963,,https://figshare.com/ndownloader/files/49061266,https://figshare.com/ndownloader/files/49061269 RAT,Rattus norvegicus,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/rattus_norvegicus/dna/Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.112.gtf.gz,10116,org.Rn.eg.db,https://figshare.com/ndownloader/files/48354472,https://figshare.com/ndownloader/files/48354475 YEAST,Saccharomyces cerevisiae,S288C,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.112.gtf.gz,559292,org.Sc.sgd.db,https://figshare.com/ndownloader/files/48354469,https://figshare.com/ndownloader/files/48354478 -SALTY,Salmonella enterica,serovar Typhimurium str. LT2,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.gtf.gz,99287,org.SentericaserovarTyphimuriumstrLT2.eg.db,https://figshare.com/ndownloader/files/49061272,https://figshare.com/ndownloader/files/49061275 +SALTY,Salmonella enterica,serovar Typhimurium str. LT2,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.gtf.gz,99287,,https://figshare.com/ndownloader/files/49061272,https://figshare.com/ndownloader/files/49061275 ,Serratia liquefaciens,ATCC 27592,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.gtf.gz,1346614,,https://figshare.com/ndownloader/files/49061278,https://figshare.com/ndownloader/files/49061281 ,Staphylococcus aureus,MRSA252,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.gtf.gz,282458,,https://figshare.com/ndownloader/files/49061284,https://figshare.com/ndownloader/files/49061287 ,Streptococcus mutans,UA159,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.gtf.gz,210007,,https://figshare.com/ndownloader/files/49061290,https://figshare.com/ndownloader/files/49061293 From f06cf149b9048c2eb6935c8398802ef3ef49e52f Mon Sep 17 00:00:00 2001 From: torres-alexis Date: Wed, 30 Oct 2024 13:38:21 -0700 Subject: [PATCH 08/12] move timeout to top of scripts, add to readme --- .../GL_RefAnnotTable-A/README.md | 4 ++++ .../GL-DPPD-7110-A_build-genome-annots-tab.R | 15 ++++++--------- .../workflow_code/install-org-db.R | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md index e3990af5..2b84e200 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md @@ -54,6 +54,8 @@ The GL_RefAnnotTable-A workflow can be run using one of two approaches: Please follow the instructions for the approach that best matches your setup and preferences. Each method is explained in detail below. +> **Note**: If you encounter timeout errors, you can increase the default timeout (3600 seconds) by modifying the `options(timeout=3600)` line at the top of the `GL-DPPD-7110-A_build-genome-annots-tab.R` script. +
--- @@ -198,6 +200,8 @@ The input and output data are the same for both [Approach 1: Using Singularity]( If the reference table does not specify an annotations database for the target organism in the 'annotations' column of the [GL-DPPD-7110-A_annotations.csv](../../Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv) file, the `install_annotations` function (defined in `install-org-db.R`) will be executed by default. This function can also be run as a stand-alone script: +> **Note**: If you encounter timeout errors, you can increase the default timeout (3600 seconds) by modifying the `options(timeout=3600)` line at the top of the `install-org-db.R` script. +
#### Using Singularity diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index afbccce8..1254920a 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -2,7 +2,7 @@ # Written by Mike Lee # GeneLab script for generating organism-specific gene annotation tables # Example usage: Rscript GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' - +options(timeout = 3600) # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" @@ -80,9 +80,6 @@ library(rtracklayer) ############## Define variables and output file names ################### ######################################################################### -# Set timeout time to ensure annotation file downloads will complete -options(timeout = 600) - ref_table <- tryCatch( read.csv(ref_tab_path), error = function(e) { @@ -133,9 +130,6 @@ if ( file.exists(out_table_filename) ) { ######## Load annotation databases ######### ############################################# -# Set timeout time to ensure annotation file downloads will complete -options(timeout = 600) - ####### GTF ########## # Create the GTF dataframe from its path, unique gene identities in the reference assembly are under 'gene_id' @@ -186,15 +180,18 @@ install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path # Load the package into the R session library(target_org_db, character.only = TRUE) + + # Return the target_org_db name + return(target_org_db) } # Define list of supported organisms which do not use annotations from an org.db no_org_db <- c("Lactobacillus acidophilus", "Mycobacterium marinum", "Oryza sativa", "Pseudomonas aeruginosa", "Serratia liquefaciens", "Staphylococcus aureus", "Streptococcus mutans", "Vibrio fischeri") -# Run the function unless the target_organism is in no_org_db +# Run the function unless the target_organism is in no_org_db and update target_org_db with the result if (!(target_organism %in% no_org_db) && (target_organism %in% currently_accepted_orgs)) { - install_and_load_org_db(target_organism, target_org_db, ref_tab_path) + target_org_db <- install_and_load_org_db(target_organism, target_org_db, ref_tab_path) } ############################################ diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R index c1ad5613..fb8fe1a2 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R @@ -1,5 +1,5 @@ # install-org-db.R - +options(timeout=3600) # Load required libraries library(tidyverse) library(AnnotationForge) From 5088539f028bfe75a466474b488ef86e56e83e2c Mon Sep 17 00:00:00 2001 From: torres-alexis Date: Wed, 30 Oct 2024 13:41:59 -0700 Subject: [PATCH 09/12] add no-home + bind local path to same container path --- .../Workflow_Documentation/GL_RefAnnotTable-A/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md index 2b84e200..fdbe9f11 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md @@ -106,9 +106,9 @@ While in the directory containing the `GL_RefAnnotTable-A_1.1.0` folder that was ```bash -singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:/work \ +singularity exec --no-home -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ -Rscript /work/GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' +Rscript GL_RefAnnotTable-A_1.1.0/GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' ```
@@ -207,9 +207,9 @@ If the reference table does not specify an annotations database for the target o #### Using Singularity ```bash -singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:/work \ +singularity exec --no-home -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ -Rscript /work/install-org-db.R 'Bacillus subtilis' +Rscript GL_RefAnnotTable-A_1.1.0/install-org-db.R 'Bacillus subtilis' ```
From 63683814c083e4ac7fab50e5735f6165a6b2ddbf Mon Sep 17 00:00:00 2001 From: torres-alexis Date: Wed, 30 Oct 2024 19:08:22 -0700 Subject: [PATCH 10/12] add cols bioconductor_annotations, custom_annotations, change dppd var workflow_version --- .../GL-DPPD-7110-A/GL-DPPD-7110-A.md | 8 ++-- .../GL-DPPD-7110-A_annotations.csv | 48 +++++++++---------- .../GL-DPPD-7110-A_build-genome-annots-tab.R | 2 +- .../workflow_code/install-org-db.R | 2 +- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md index 561fe596..0fba4029 100644 --- a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md +++ b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md @@ -188,7 +188,7 @@ lib_path <- file.path(getwd()) # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" -workflow_version <- "GL_RefAnnotTable-A_1.1.0" +workflow_version <- "" ref_tab_path <- "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/master/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv" readme_path <- "https://github.com/nasa/GeneLab_Data_Processing/tree/master/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md" @@ -215,7 +215,7 @@ library(rtracklayer) **Output Data:** - `GL_DPPD_ID` (variable specifying the GeneLab Data Processing Pipeline Document ID) -- `workflow_version (variable specifying the current version of the workflow) +- `workflow_version` (variable specifying the [current version of the workflow](https://github.com/nasa/GeneLab_Data_Processing/tree/DEV_GeneLab_Reference_Annotations_vGL-DPPD-7110-A/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A)) - `ref_tab_path` (variable specifying the path to the reference table CSV file) - `readme_path` (variable specifying the path to the README file) - `currently_accepted_orgs` (variable specifying the list of currently supported organisms) @@ -244,7 +244,7 @@ target_info <- ref_table %>% # Extract the relevant columns from the reference table target_taxid <- target_info$taxon # Taxonomic identifier -target_org_db <- target_info$annotations # org.eg.db R package +target_org_db <- target_info$bioconductor_annotations # org.eg.db R package gtf_link <- target_info$gtf # Path to reference assembly GTF target_short_name <- target_info$name # PANTHER / UNIPROT short name; blank if not available ref_source <- target_info$ref_source # Reference files source @@ -284,7 +284,7 @@ if ( file.exists(out_table_filename) ) { **Output Data:** - `target_taxid` (variable specifying the taxonomic identifier for the target organism) -- `target_org_db` (variable specifying the name of the org.db R package for the target organism) +- `target_org_db` (variable specifying the name of the org.eg.db R package for the target organism if it is hosted by Bioconductor) - `gtf_link` (variable specifying the URL to the GTF file for the target organism) - `target_short_name` (variable specifying the PANTHER/UNIPROT short name for the target organism) - `ref_source` (variable specifying the source of the reference files, e.g., "ensembl", "ensembl_plants", "ensembl_bacteria", "ncbi") diff --git a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv index 12a2c8b8..c2a881e2 100644 --- a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv +++ b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv @@ -1,24 +1,24 @@ -name,species,strain,ensemblVersion,ref_source,fasta,gtf,taxon,annotations,genelab_annots_link,genelab_annots_info_link -ARABIDOPSIS,Arabidopsis thaliana,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/arabidopsis_thaliana/Arabidopsis_thaliana.TAIR10.59.gtf.gz,3702,org.At.tair.db,https://figshare.com/ndownloader/files/48354355,https://figshare.com/ndownloader/files/48354352 -BACSU,Bacillus subtilis,subsp. subtilis 168,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.59.gtf.gz,224308,,https://figshare.com/ndownloader/files/48354346,https://figshare.com/ndownloader/files/48354349 -BRADI,Brachypodium distachyon,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/brachypodium_distachyon/dna/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/brachypodium_distachyon/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.59.gtf.gz,15368,,https://figshare.com/ndownloader/files/48354370,https://figshare.com/ndownloader/files/48354361 -BRARP,Brassica rapa,,59,ensembl_plants,http://ftp.ensemblgenomes.org/pub/plants/release-59/fasta/brassica_rapa/dna/Brassica_rapa.Brapa_1.0.dna.toplevel.fa.gz,http://ftp.ensemblgenomes.org/pub/plants/release-59/gtf/brassica_rapa/Brassica_rapa.Brapa_1.0.59.gtf.gz,,,, -WORM,Caenorhabditis elegans,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna.toplevel.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.112.gtf.gz,6239,org.Ce.eg.db,https://figshare.com/ndownloader/files/48354373,https://figshare.com/ndownloader/files/48354364 -ZEBRAFISH,Danio rerio,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna.primary_assembly.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/danio_rerio/Danio_rerio.GRCz11.112.gtf.gz,7955,org.Dr.eg.db,https://figshare.com/ndownloader/files/48354388,https://figshare.com/ndownloader/files/48354367 -FLY,Drosophila melanogaster,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz,7227,org.Dm.eg.db,https://figshare.com/ndownloader/files/48354382,https://figshare.com/ndownloader/files/48354376 -ERCC,,,,ThermoFisher,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,,,, -ECOLI,Escherichia coli,str. K-12 substr. MG1655,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/dna/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.59.gtf.gz,511145,,https://figshare.com/ndownloader/files/48354379,https://figshare.com/ndownloader/files/48354394 -HUMAN,Homo sapiens,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz,9606,org.Hs.eg.db,https://figshare.com/ndownloader/files/48354445,https://figshare.com/ndownloader/files/48354448 -,Lactobacillus acidophilus,NCFM,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.gtf.gz,272621,,https://figshare.com/ndownloader/files/49061254,https://figshare.com/ndownloader/files/49061257 -MOUSE,Mus musculus,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/mus_musculus/Mus_musculus.GRCm39.112.gtf.gz,10090,org.Mm.eg.db,https://figshare.com/ndownloader/files/48354460,https://figshare.com/ndownloader/files/48354457 -,Mycobacterium marinum,M,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,216594,,https://figshare.com/ndownloader/files/49061260,https://figshare.com/ndownloader/files/49061263 -ORYSJ,Oryza sativa,Japonica,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/oryza_sativa/dna/Oryza_sativa.IRGSP-1.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/oryza_sativa/Oryza_sativa.IRGSP-1.0.59.gtf.gz,39947,,https://figshare.com/ndownloader/files/48354451,https://figshare.com/ndownloader/files/48354454 -ORYLA,Oryzias latipes,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/oryzias_latipes/dna/Oryzias_latipes.ASM223467v1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/oryzias_latipes/Oryzias_latipes.ASM223467v1.112.gtf.gz,8090,,https://figshare.com/ndownloader/files/48354463,https://figshare.com/ndownloader/files/48354466 -,Pseudomonas aeruginosa,UCBPP-PA14,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.gtf.gz,208963,,https://figshare.com/ndownloader/files/49061266,https://figshare.com/ndownloader/files/49061269 -RAT,Rattus norvegicus,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/rattus_norvegicus/dna/Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.112.gtf.gz,10116,org.Rn.eg.db,https://figshare.com/ndownloader/files/48354472,https://figshare.com/ndownloader/files/48354475 -YEAST,Saccharomyces cerevisiae,S288C,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.112.gtf.gz,559292,org.Sc.sgd.db,https://figshare.com/ndownloader/files/48354469,https://figshare.com/ndownloader/files/48354478 -SALTY,Salmonella enterica,serovar Typhimurium str. LT2,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.gtf.gz,99287,,https://figshare.com/ndownloader/files/49061272,https://figshare.com/ndownloader/files/49061275 -,Serratia liquefaciens,ATCC 27592,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.gtf.gz,1346614,,https://figshare.com/ndownloader/files/49061278,https://figshare.com/ndownloader/files/49061281 -,Staphylococcus aureus,MRSA252,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.gtf.gz,282458,,https://figshare.com/ndownloader/files/49061284,https://figshare.com/ndownloader/files/49061287 -,Streptococcus mutans,UA159,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.gtf.gz,210007,,https://figshare.com/ndownloader/files/49061290,https://figshare.com/ndownloader/files/49061293 -,Vibrio fischeri,ES114,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.gtf.gz,312309,,https://figshare.com/ndownloader/files/49061296,https://figshare.com/ndownloader/files/49061299 \ No newline at end of file +name,species,strain,ensemblVersion,ref_source,fasta,gtf,taxon,bioconductor_annotations,custom_annotations,genelab_annots_link,genelab_annots_info_link +ARABIDOPSIS,Arabidopsis thaliana,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/arabidopsis_thaliana/Arabidopsis_thaliana.TAIR10.59.gtf.gz,3702,org.At.tair.db,,https://figshare.com/ndownloader/files/48354355,https://figshare.com/ndownloader/files/48354352 +BACSU,Bacillus subtilis,subsp. subtilis 168,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.59.gtf.gz,224308,,org.Bsubtilissubspsubtilis168.eg.db,https://figshare.com/ndownloader/files/48354346,https://figshare.com/ndownloader/files/48354349 +BRADI,Brachypodium distachyon,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/brachypodium_distachyon/dna/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/brachypodium_distachyon/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.59.gtf.gz,15368,,org.Bdistachyon.eg.db,https://figshare.com/ndownloader/files/48354370,https://figshare.com/ndownloader/files/48354361 +BRARP,Brassica rapa,,59,ensembl_plants,http://ftp.ensemblgenomes.org/pub/plants/release-59/fasta/brassica_rapa/dna/Brassica_rapa.Brapa_1.0.dna.toplevel.fa.gz,http://ftp.ensemblgenomes.org/pub/plants/release-59/gtf/brassica_rapa/Brassica_rapa.Brapa_1.0.59.gtf.gz,,,,, +WORM,Caenorhabditis elegans,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna.toplevel.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.112.gtf.gz,6239,org.Ce.eg.db,,https://figshare.com/ndownloader/files/48354373,https://figshare.com/ndownloader/files/48354364 +ZEBRAFISH,Danio rerio,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna.primary_assembly.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/danio_rerio/Danio_rerio.GRCz11.112.gtf.gz,7955,org.Dr.eg.db,,https://figshare.com/ndownloader/files/48354388,https://figshare.com/ndownloader/files/48354367 +FLY,Drosophila melanogaster,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz,7227,org.Dm.eg.db,,https://figshare.com/ndownloader/files/48354382,https://figshare.com/ndownloader/files/48354376 +ERCC,,,,ThermoFisher,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,,,,, +ECOLI,Escherichia coli,str. K-12 substr. MG1655,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/dna/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.59.gtf.gz,511145,,org.EcolistrK12substrMG1655.eg.db,https://figshare.com/ndownloader/files/48354379,https://figshare.com/ndownloader/files/48354394 +HUMAN,Homo sapiens,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz,9606,org.Hs.eg.db,,https://figshare.com/ndownloader/files/48354445,https://figshare.com/ndownloader/files/48354448 +,Lactobacillus acidophilus,NCFM,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.gtf.gz,272621,,,https://figshare.com/ndownloader/files/49061254,https://figshare.com/ndownloader/files/49061257 +MOUSE,Mus musculus,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/mus_musculus/Mus_musculus.GRCm39.112.gtf.gz,10090,org.Mm.eg.db,,https://figshare.com/ndownloader/files/48354460,https://figshare.com/ndownloader/files/48354457 +,Mycobacterium marinum,M,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,216594,,,https://figshare.com/ndownloader/files/49061260,https://figshare.com/ndownloader/files/49061263 +ORYSJ,Oryza sativa,Japonica,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/oryza_sativa/dna/Oryza_sativa.IRGSP-1.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/oryza_sativa/Oryza_sativa.IRGSP-1.0.59.gtf.gz,39947,,,https://figshare.com/ndownloader/files/48354451,https://figshare.com/ndownloader/files/48354454 +ORYLA,Oryzias latipes,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/oryzias_latipes/dna/Oryzias_latipes.ASM223467v1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/oryzias_latipes/Oryzias_latipes.ASM223467v1.112.gtf.gz,8090,,org.Olatipes.eg.db,https://figshare.com/ndownloader/files/48354463,https://figshare.com/ndownloader/files/48354466 +,Pseudomonas aeruginosa,UCBPP-PA14,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.gtf.gz,208963,,,https://figshare.com/ndownloader/files/49061266,https://figshare.com/ndownloader/files/49061269 +RAT,Rattus norvegicus,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/rattus_norvegicus/dna/Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.112.gtf.gz,10116,org.Rn.eg.db,,https://figshare.com/ndownloader/files/48354472,https://figshare.com/ndownloader/files/48354475 +YEAST,Saccharomyces cerevisiae,S288C,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.112.gtf.gz,559292,org.Sc.sgd.db,,https://figshare.com/ndownloader/files/48354469,https://figshare.com/ndownloader/files/48354478 +SALTY,Salmonella enterica,serovar Typhimurium str. LT2,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.gtf.gz,99287,,org.SentericaserovarTyphimuriumstrLT2.eg.db,https://figshare.com/ndownloader/files/49061272,https://figshare.com/ndownloader/files/49061275 +,Serratia liquefaciens,ATCC 27592,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.gtf.gz,1346614,,,https://figshare.com/ndownloader/files/49061278,https://figshare.com/ndownloader/files/49061281 +,Staphylococcus aureus,MRSA252,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.gtf.gz,282458,,,https://figshare.com/ndownloader/files/49061284,https://figshare.com/ndownloader/files/49061287 +,Streptococcus mutans,UA159,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.gtf.gz,210007,,,https://figshare.com/ndownloader/files/49061290,https://figshare.com/ndownloader/files/49061293 +,Vibrio fischeri,ES114,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.gtf.gz,312309,,,https://figshare.com/ndownloader/files/49061296,https://figshare.com/ndownloader/files/49061299 \ No newline at end of file diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index 1254920a..058aea71 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -94,7 +94,7 @@ target_info <- ref_table %>% # Extract the relevant columns from the reference table target_taxid <- target_info$taxon # Taxonomic identifier -target_org_db <- target_info$annotations # org.eg.db R package +target_org_db <- target_info$bioconductor_annotations # org.eg.db R package gtf_link <- target_info$gtf # Path to reference assembly GTF target_short_name <- target_info$name # PANTHER / UNIPROT short name; blank if not available ref_source <- target_info$ref_source # Reference files source diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R index fb8fe1a2..72421811 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R @@ -52,7 +52,7 @@ install_annotations <- function(target_organism, refTablePath = NULL) { # Get package name or build it if not provided target_org_db <- ref_table %>% filter(species == target_organism) %>% - pull(annotations) + pull(bioconductor_annotations) if (is.na(target_org_db) || target_org_db == "") { cat("\nNo annotation database specified. Constructing package name...\n") From b39c63c147cb5ef30f7d255d2a4f65c8a4cdd8d4 Mon Sep 17 00:00:00 2001 From: torres-alexis Date: Wed, 30 Oct 2024 21:05:53 -0700 Subject: [PATCH 11/12] remove --no-home from readme --- .../Workflow_Documentation/GL_RefAnnotTable-A/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md index fdbe9f11..92ff4dbd 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md @@ -106,7 +106,7 @@ While in the directory containing the `GL_RefAnnotTable-A_1.1.0` folder that was ```bash -singularity exec --no-home -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ +singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ Rscript GL_RefAnnotTable-A_1.1.0/GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' ``` @@ -207,7 +207,7 @@ If the reference table does not specify an annotations database for the target o #### Using Singularity ```bash -singularity exec --no-home -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ +singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ Rscript GL_RefAnnotTable-A_1.1.0/install-org-db.R 'Bacillus subtilis' ``` From dcdf589d00ed4e417c88a2392684d69590cb34c0 Mon Sep 17 00:00:00 2001 From: Alexis Torres Date: Thu, 31 Oct 2024 14:32:42 -0700 Subject: [PATCH 12/12] Add r_libs to scrips, readme, standardize notes --- .../GL_RefAnnotTable-A/README.md | 40 ++++++++++++------- .../GL-DPPD-7110-A_build-genome-annots-tab.R | 2 +- .../workflow_code/install-org-db.R | 1 + 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md index 92ff4dbd..aefe1a3b 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md @@ -70,13 +70,13 @@ This approach allows you to run the workflow within a containerized environment, Singularity is a containerization platform for running applications portably and reproducibly. We use container images hosted on Quay.io to encapsulate all the necessary software and dependencies required by the GL_RefAnnotTable-A workflow. This setup allows you to run the workflow without installing any software directly on your system. -> ***Note**: Other containerization tools like Docker or Apptainer can also be used to pull and run these images.* +> **Note**: Other containerization tools like Docker or Apptainer can also be used to pull and run these images. We recommend installing Singularity system-wide as per the official [Singularity installation documentation](https://docs.sylabs.io/guides/3.10/admin-guide/admin_quickstart.html). -> ***Note**: While Singularity is also available through [Anaconda](https://anaconda.org/conda-forge/singularity), we recommend installing Singularity system-wide following the official installation documentation.* +> **Note**: While Singularity is also available through [Anaconda](https://anaconda.org/conda-forge/singularity), we recommend installing Singularity system-wide following the official installation documentation.
@@ -84,17 +84,19 @@ We recommend installing Singularity system-wide as per the official [Singularity To pull the Singularity image needed for the workflow, you can use the provided script as directed below or pull the image directly. -> ***Note**: This command should be run in the location containing the `GL_RefAnnotTable-A_1.1.0` directory that was downloaded in [step 1](#1-download-the-workflow-files). Depending on your network speed, fetching the images will take approximately 20 minutes.* - +> **Note**: This command should be run in the location containing the `GL_RefAnnotTable-A_1.1.0` directory that was downloaded in [step 1](#1-download-the-workflow-files). Depending on your network speed, fetching the images will take approximately 20 minutes. ```bash bash GL_RefAnnotTable-A_1.1.0/bin/prepull_singularity.sh GL_RefAnnotTable-A_1.1.0/config/software/by_docker_image.config ``` -Once complete, a `singularity` folder containing the Singularity images will be created. Run the following command to export this folder as an environment variable: - +Once complete, a `singularity` folder containing the Singularity images will be created. Next, set up the required environment variables: ```bash +# Set R library path to current working directory +export R_LIBS_USER=$(pwd)/R_libs + +# Set Singularity cache directory export SINGULARITY_CACHEDIR=$(pwd)/singularity ``` @@ -102,13 +104,15 @@ export SINGULARITY_CACHEDIR=$(pwd)/singularity #### Step 3: Run the Workflow -While in the directory containing the `GL_RefAnnotTable-A_1.1.0` folder that was downloaded in [step 1](#1-download-the-workflow-files), you can now run the workflow. Below is an example for generating the annotation table for *Mus musculus* (mouse): - +> **Note**: The annotation database creation process requires FTP access through port 21. If you encounter connection issues, please verify that port 21 is not blocked by your network/firewall settings or try running the workflow on a system with unrestricted FTP access. + +While in the directory containing the `GL_RefAnnotTable-A_1.1.0` folder that was downloaded in [step 1](#1-download-the-workflow-files), you can now run the workflow. Below is an example for generating the annotation table for *Mus musculus* (mouse): ```bash -singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ -$SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ -Rscript GL_RefAnnotTable-A_1.1.0/GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' +singularity exec \ + --bind $(pwd):$(pwd) \ + $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ + Rscript GL_RefAnnotTable-A_1.1.0/GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' ```
@@ -206,12 +210,18 @@ If the reference table does not specify an annotations database for the target o #### Using Singularity +> **Note**: The annotation database creation process requires FTP access through port 21. If you encounter connection issues, please verify that port 21 is not blocked by your network/firewall settings. + ```bash -singularity exec -B $(pwd)/GL_RefAnnotTable-A_1.1.0:$(pwd)/GL_RefAnnotTable-A_1.1.0 \ -$SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ -Rscript GL_RefAnnotTable-A_1.1.0/install-org-db.R 'Bacillus subtilis' +# Set R library path if not already set +export R_LIBS_USER=$(pwd)/R_libs + +singularity exec \ + --bind $(pwd):$(pwd) \ + $SINGULARITY_CACHEDIR/quay.io-nasa_genelab-gl-refannottable-a-1.1.0.img \ + Rscript GL_RefAnnotTable-A_1.1.0/install-org-db.R 'Bacillus subtilis' ``` - +
#### Using a Local R Environment diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R index 058aea71..46923217 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/GL-DPPD-7110-A_build-genome-annots-tab.R @@ -3,7 +3,7 @@ # GeneLab script for generating organism-specific gene annotation tables # Example usage: Rscript GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus' options(timeout = 3600) - +.libPaths(Sys.getenv("R_LIBS_USER")) # Define variables associated with current pipeline and annotation table versions GL_DPPD_ID <- "GL-DPPD-7110-A" workflow_version <- "GL_RefAnnotTable-A_1.1.0" diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R index 72421811..00f03548 100644 --- a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R +++ b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/workflow_code/install-org-db.R @@ -1,5 +1,6 @@ # install-org-db.R options(timeout=3600) +.libPaths(Sys.getenv("R_LIBS_USER")) # Load required libraries library(tidyverse) library(AnnotationForge)