Skip to content

Commit

Permalink
add softwares and databases
Browse files Browse the repository at this point in the history
  • Loading branch information
Miachol committed Jan 19, 2018
1 parent f458135 commit f6f8623
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 6 deletions.
12 changes: 11 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
2018-01-19 Li Jianfeng <[email protected]>

* added db_annovar_docm, db_annovar_intogen,
db_annovar_disgenet, db_annovar_cancer_hotspots in db
* rename db_cancer_hotspot to db_cancer_hotspots
* added RESM, radia in github
* added rMATS, PARADA, IGV, Marina, PARADIGM, Meerkat,
vadir, in nongithub
* source parse_version.R in local env

2018-01-10 Li Jianfeng <[email protected]>

* added GIGGLE (genomic search engine)
* added facets, GIGGLE (genomic search engine) in github
* added absolute, hapseg, atlas2, beagle, contest
in nongithub

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BioInstaller
Title: Lightweight Biology Software Installer
Version: 0.3.2.2
Version: 0.3.2.3
Authors@R: person("Jianfeng", "Li", email = "[email protected]", role = c("aut", "cre"))
Description:
Can be used to install and download massive bioinformatics analysis softwares and databases, such as NGS reads mapping tools with its required databases.
Expand Down
2 changes: 1 addition & 1 deletion R/versions.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use.github.response <- function(config) {

nongithub2versions <- function(name) {
script <- system.file("extdata", "scripts/parse_version.R", package = "BioInstaller")
source(script)
source(script, local = TRUE)
text <- sprintf("get.%s.versions()", name)
tryCatch(eval(parse(text = text)), error = function(e) {
NULL
Expand Down
25 changes: 25 additions & 0 deletions inst/extdata/config/db/db_annovar.toml
Original file line number Diff line number Diff line change
Expand Up @@ -675,3 +675,28 @@ source_url = "http://bioinfo.rjh.com.cn/download/annovarR/humandb/tall_somatic_g
version_available = "20171206"
buildver_available = ["hg19", "hg38"]
description = "Collected from recently published papers, 1) Recurrent SPI1 (PU.1) fusions in high-risk pediatric T cell acute lymphoblastic leukemia 2) The genomic landscape of pediatric and young adult T-lineage acute lymphoblastic leukemia 3) Identification of fusion genes and characterization of transcriptome features in T-cell acute lymphoblastic leukemia"

[db_annovar_docm]
source_url = "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{buildver}}_{{version}}.txt.gz"
version_available = "docm_3.2"
buildver_available = ["hg19"]
description = "DoCM, the Database of Curated Mutations, is a highly curated database of known, disease-causing mutations that provides easily explorable variant lists with direct links to source citations for easy verification. http://docm.genome.wustl.edu/about"

[db_annovar_intogen]
source_url = "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{buildver}}_{{version}}.sqlite.sql.gz"
version_available = "intogen_20180119"
buildver_available = ["hg19"]
description = "Merged intogen table intogen_mutations_catalog [Driver or passenger mutations]. https://www.intogen.org/downloads"
install = "#R#sql2sqlite('{{buildver}}_{{version}}.sqlite.sql', sqlite.path = '{{buildver}}_{{version}}.sqlite')#R#"

[db_annovar_disgenet]
source_url = "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{version}}.txt.gz"
version_available = ["disgenet_befree_gene_disease", "disgenet_befree_rs_disease", "disgenet_curated_gene_disease", "disgenet_curated_variant_disease", "disgenet_gene_disease", "disgenet_pubannotator_variant_disease", "disgenet_rs_disease"]
buildver_available = ["hg19", "hg38"]
description = "See http://www.disgenet.org/web/DisGeNET/menu/downloads"

[db_annovar_cancer_hotspots]
source_url = "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{version}}.txt.gz"
version_available = ["cancer_hotspots_v2"]
buildver_available = ["hg19", "hg38"]
description = "See http://cancerhotspots.org/#/home Hotspot Results V2 sheet1"
4 changes: 2 additions & 2 deletions inst/extdata/config/db/db_main.toml
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,8 @@ version_available = ["RNAedit", "dbSNP"]
source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/docm/docm_{{version}}.txt.gz"
version_available = ["3.2", "3_clinvar_export"]

[db_cancer_hotspot]
source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/cancer_hotspot/cancer_hotspot_{{version}}.txt.gz"
[db_cancer_hotspots]
source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/cancer_hotspots/cancer_hotspots_{{version}}.txt.gz"
version_available = ["v1_sheet1", "v1_sheet2", "v2_sheet1", "v2_sheet2"]

[db_intogen]
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/config/db/db_meta.toml
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ description = "DoCM, the Database of Curated Mutations, is a highly curated data
publication = "A correspondence describing DoCM has been published in Nature Methods: DoCM: a database of curated mutations in cancer. Nature Methods (2016) doi:10.1038/nmeth.4000."
tag = ["NGS", "database"]

[db.item.cancer_hotspot]
[db.item.cancer_hotspots]
title = "A RESOURCE FOR STATISTICALLY SIGNIFICANT MUTATIONS IN CANCER"
description = "This resource is maintained by the Kravis Center for Molecular Oncology at Memorial Sloan Kettering Cancer Center. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data."
publication = ["Chang et al., Accelerating discovery of functional mutant alleles in cancer. Cancer Discovery, 10.1158/2159-8290.CD-17-0321 (2017)",
Expand Down
10 changes: 10 additions & 0 deletions inst/extdata/config/github/github.toml
Original file line number Diff line number Diff line change
Expand Up @@ -623,3 +623,13 @@ github_url = "https://github.com/mskcc/facets"
[facets.install]
linux = "#R#devtools::install('.', build_vignettes = TRUE);devtools::install_github('mskcc/pctGCdata')#R#"
mac = "#R#devtools::install('.', build_vignettes = TRUE);devtools::install_github('mskcc/pctGCdata')#R#"

[resm]
github_url = "https://github.com/deweylab/RSEM"
[resm.install]
linux = "make;make ebseq; make install DESTDIR={{destdir}} prefix=''"
mac = "make;make ebseq; make install DESTDIR={{destdir}} prefix=''"
windows = "make cygwin=true; make ebseq; make install DESTDIR={{destdir}} prefix=''"

[radia]
github_url = "https://github.com/aradenbaugh/radia/"
12 changes: 12 additions & 0 deletions inst/extdata/config/github/github_meta.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,15 @@ publication = "Liu B, Guan D, Teng M, et al. rHAT: fast alignment of noisy long
title = "GIGGLE: a search engine for large-scale integrated genome analysis"
description = "GIGGLE is a genomics search engine that identifies and ranks the significance of genomic loci shared between query features and thousands of genome interval files. GIGGLE (https:// github.com/ryanlayer/giggle) scales to billions of intervals and is over three orders of magnitude faster than existing methods. Its speed extends the accessibility and utility of resources such as ENCODE , Roadmap Epigenomics, and GTE x by facilitating data integration and hypothesis generation."
publication = "Layer, R.M. et al. GIGGLE: a search engine for large-scale integrated genome analysis. Nat Methods (2018)."

[github.item.resm]
title = "RSEM: accurate quantification of gene and isoform expression from RNA-Seq data"
description = "RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. The RSEM package provides an user-friendly interface, supports threads for parallel computation of the EM algorithm, single-end and paired-end read data, quality scores, variable-length reads and RSPD estimation. In addition, it provides posterior mean and 95% credibility interval estimates for expression levels. For visualization, It can generate BAM and Wiggle files in both transcript-coordinate and genomic-coordinate. Genomic-coordinate files can be visualized by both UCSC Genome browser and Broad Institute's Integrative Genomics Viewer (IGV). Transcript-coordinate files can be visualized by IGV. RSEM also has its own scripts to generate transcript read depth plots in pdf format. The unique feature of RSEM is, the read depth plots can be stacked, with read depth contributed to unique reads shown in black and contributed to multi-reads shown in red. In addition, models learned from data can also be visualized. Last but not least, RSEM contains a simulator."
publication = "Li B, Dewey C N. RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome[J]. BMC bioinformatics, 2011, 12(1): 323."

[github.item.radia]
title = "RADIA: RNA and DNA Integrated Analysis for Somatic Mutation Detection"
description = """RADIA identifies RNA and DNA variants in BAM files. RADIA is typically run on 3 BAM files consisting of the Normal DNA, Tumor DNA and Tumor RNA. If no RNA is available from the tumor, then it is run on the normal/tumor pairs. For the normal DNA, RADIA outputs any differences compared to the reference which could be potential Germline mutations. For the tumor DNA, RADIA outputs any differences compared to the reference and the normal DNA which could be potential Somatic mutations. RADIA combines the tumor DNA and tumor RNA to augment the somatic mutation calls. It also uses the tumor RNA to identify potential RNA editing events.
The DNA Only Method (DOM) uses just the tumor/normal pairs of DNA (ignoring the RNA), while the Triple BAM Method (TBM) uses all three datasets from the same patient to detect somatic mutations. The mutations from the TBM are further categorized into 2 sub-groups: RNA Confirmation and RNA Rescue calls. RNA Confirmation calls are those that are made by both the DOM and the TBM due to the strong read support in both the DNA and RNA. RNA Rescue calls are those that had very little DNA support, hence not called by the DOM, but strong RNA support, and thus called by the TBM. RNA Rescue calls are typically missed by traditional methods that only interrogate the DNA."""
publication = "Radenbaugh AJ, Ma S, Ewing A, Stuart JM, Collisson EA, Zhu J, Haussler D. (2014) RADIA: RNA and DNA Integrated Analysis for Somatic Mutation Detection. PLoS ONE 9(11): e111516. doi:10.1371/journal.pone.0111516"
33 changes: 33 additions & 0 deletions inst/extdata/config/nongithub/nongithub.toml
Original file line number Diff line number Diff line change
Expand Up @@ -717,3 +717,36 @@ version_available = "1.0.24530"
[contest.install]
linux = "mkdir -p {{destdir}}/bin; cp {{download.dir}}/*.jar {{destdir}}/bin"
mac = "mkdir -p {{destdir}}/bin; cp {{download.dir}}/*.jar {{destdir}}/bin"

[rmats]
source_url = "https://sourceforge.net/projects/rnaseq-mats/files/MATS/rMATS.{{version}}.tgz"
version_available = ["4.0.1", "3.2.5"]

[rmats_reffa]
source_url = "http://rmaps.cecsresearch.org/{{version}}/{{version}}index.tgz"
version_available = "STAR"

[prada]
source_url = "https://sourceforge.net/projects/prada/files/pyPRADA/pyPRADA_{{version}}.tar.gz"
version_available = "1.2"

[igv]
source_url = "http://data.broadinstitute.org/igv/projects/downloads/2.4/IGV_{{version}}.zip"
version_available = "2.4.6"

[marina]
source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/marina_matlab/marina_matlab-{{version}}.tar.gz"
version_available = "4"

[paradigm]
source_url = "http://paradigm.five3genomics.com/five3_paradigm_webapi.py"
version_available = "latest"

[meerkat]
source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/meerkat/meerkat.{{version}}.tar.gz"
version_available = "0.189"

[vadir]
source_url = ["http://bioinfo.rjh.com.cn/download/bioinstaller/vadir/VaDiR.tar.gz",
"ftp://penguin.genomics.cn/pub/10.5524/100001_101000/100360/VaDiR.tar.gz"]
version_available = "latest"
Loading

0 comments on commit f6f8623

Please sign in to comment.