From f458135c232b237a770d195a53f1611b05af00e1 Mon Sep 17 00:00:00 2001 From: Li Jianfeng Date: Wed, 17 Jan 2018 18:16:07 +0800 Subject: [PATCH] added some of softwares --- ChangeLog | 6 +++ Makefile | 2 +- R/install.R | 3 +- inst/extdata/config/github/github.toml | 12 ++++++ inst/extdata/config/github/github_meta.toml | 5 +++ inst/extdata/config/nongithub/nongithub.toml | 40 +++++++++++++++++++ .../config/nongithub/nongithub_meta.toml | 27 +++++++++++++ 7 files changed, 93 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index db61ef0..4713b94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2018-01-10 Li Jianfeng + + * added GIGGLE (genomic search engine) + * added absolute, hapseg, atlas2, beagle, contest + in nongithub + 2018-01-09 Li Jianfeng * added olego, chronqc and rHAT diff --git a/Makefile b/Makefile index 458bfc1..e61d946 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ show_versions: test: cd .;\ - Rscript -e "devtools::test()" + Rscript -e "devtools::test(reporter = 'summary')" test2: @echo "name:$(name), version:$(version)" diff --git a/R/install.R b/R/install.R index 3f44b30..ea507c9 100644 --- a/R/install.R +++ b/R/install.R @@ -143,7 +143,8 @@ install.bioinfo <- function(name = c(), download.dir = c(), destdir = c(), name. info.msg(sprintf("%s downloaded fail!", name), verbose = verbose) return(FALSE) } - if (status == TRUE && (name.saved[count] %in% show.installed(db)) || (status == TRUE && !save.to.db)) { + if (status == TRUE && (name.saved[count] %in% show.installed(db)) || (status == + TRUE && !save.to.db)) { install.success <- c(install.success, name.saved[count]) } else { install.fail <- c(install.fail, name.saved[count]) diff --git a/inst/extdata/config/github/github.toml b/inst/extdata/config/github/github.toml index cbabd8a..49755b8 100644 --- a/inst/extdata/config/github/github.toml +++ b/inst/extdata/config/github/github.toml @@ -611,3 +611,15 @@ make_dir = ["./src"] [rhat.install] linux = "make; mkdir -p {{destdir}}/bin; cp rHAT-aligner rHAT-indexer {{destdir}}/bin" mac = "make; mkdir -p {{destdir}}/bin; cp rHAT-aligner rHAT-indexer {{destdir}}/bin" + +[giggle] +github_url = "https://github.com/ryanlayer/giggle" +[giggle.install] +linux = ["make", "mkdir -p {{destdir}}/bin; cp bin/* {{destdir}}/bin"] +mac = ["make", "mkdir -p {{destdir}}/bin; cp bin/* {{destdir}}/bin"] + +[facets] +github_url = "https://github.com/mskcc/facets" +[facets.install] +linux = "#R#devtools::install('.', build_vignettes = TRUE);devtools::install_github('mskcc/pctGCdata')#R#" +mac = "#R#devtools::install('.', build_vignettes = TRUE);devtools::install_github('mskcc/pctGCdata')#R#" diff --git a/inst/extdata/config/github/github_meta.toml b/inst/extdata/config/github/github_meta.toml index b28a717..854bc20 100644 --- a/inst/extdata/config/github/github_meta.toml +++ b/inst/extdata/config/github/github_meta.toml @@ -53,3 +53,8 @@ title = "rHAT: fast alignment of noisy long reads with regional hashing." description = """MOTIVATION:Single Molecule Real-Time (SMRT) sequencing has been widely applied in cutting-edge genomic studies. However, it is still an expensive task to align the noisy long SMRT reads to reference genome by state-of-the-art aligners, which is becoming a bottleneck in applications with SMRT sequencing. Novel approach is on demand for improving the efficiency and effectiveness of SMRT read alignment. RESULTS:We propose Regional Hashing-based Alignment Tool (rHAT), a seed-and-extension-based read alignment approach specifically designed for noisy long reads. rHAT indexes reference genome by regional hash table (RHT), a hash table-based index which describes the short tokens within local windows of reference genome. In the seeding phase, rHAT utilizes RHT for efficiently calculating the occurrences of short token matches between partial read and local genomic windows to find highly possible candidate sites. In the extension phase, a sparse dynamic programming-based heuristic approach is used for reducing the cost of aligning read to the candidate sites. By benchmarking on the real and simulated datasets from various prokaryote and eukaryote genomes, we demonstrated that rHAT can effectively align SMRT reads with outstanding throughput.""" publication = "Liu B, Guan D, Teng M, et al. rHAT: fast alignment of noisy long reads with regional hashing[J]. Bioinformatics, 2015, 32(11): 1625-1631." + +[github.item.giggle] +title = "GIGGLE: a search engine for large-scale integrated genome analysis" +description = "GIGGLE is a genomics search engine that identifies and ranks the significance of genomic loci shared between query features and thousands of genome interval files. GIGGLE (https:// github.com/ryanlayer/giggle) scales to billions of intervals and is over three orders of magnitude faster than existing methods. Its speed extends the accessibility and utility of resources such as ENCODE , Roadmap Epigenomics, and GTE x by facilitating data integration and hypothesis generation." +publication = "Layer, R.M. et al. GIGGLE: a search engine for large-scale integrated genome analysis. Nat Methods (2018)." diff --git a/inst/extdata/config/nongithub/nongithub.toml b/inst/extdata/config/nongithub/nongithub.toml index a3e8040..b2ec608 100644 --- a/inst/extdata/config/nongithub/nongithub.toml +++ b/inst/extdata/config/nongithub/nongithub.toml @@ -165,6 +165,12 @@ bin_dir = ["{{destdir}}/bin"] linux = "mkdir -p {{destdir}}/bin; cp *.jar {{destdir}}/bin" mac = "mkdir -p {{destdir}}/bin; cp *.jar {{destdir}}/bin" +[gatk4] +source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/gatk/gatk-{{version}}.zip" +version_available = "4.0.0.0" +linux = "mkdir -p {{destdir}}/bin; cp gatk *.jar {{destdir}}/bin" +mac = "mkdir -p {{destdir}}/bin; cp gatk *.jar {{destdir}}/bin" + [mutect] source_url = ["https://github.com/Miachol/gatk_releases/raw/master/mutect{{version}}.zip"] version_newest = "1.1.7" @@ -677,3 +683,37 @@ use_github_versions = true [vcfanno.install] linux = "mkdir -p {{destdir}}/bin; chmod a+x {{download.dir}}/vcfanno_linux64; cp {{download.dir}}/vcfanno_linux64 {{destdir}}/bin" mac = "mkdir -p {{destdir}}/bin; chmod a+x {{download.dir}}/vcfanno_osx; cp {{download.dir}}/vcfanno_osx {{destdir}}/bin" + +[absolute] +source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/absolute/ABSOLUTE_{{version}}.tar.gz" +version_available = ["1.0.6", "1.0.5"] +[absolute.install] +linux = "#R#devtools::install('./')#R#" +mac = "#R#devtools::install('./')#R#" +windows = "#R#devtools::install('./')#R#" + +[hapseg] +source_url = "http://bioinfo.rjh.com.cn/download/bioinstaller/absolute/HAPSEG_{{version}}.tar.gz" +version_available = ["1.1.1"] +[hapseg.install] +linux = "#R#setRepositories(ind=1:2);devtools::install('./')#R#" +mac = "#R#setRepositories(ind=1:2);devtools::install('./')#R#" +windows = "#R#setRepositories(ind=1:2);devtools::install('./')#R#" + +[atlas2] +source_url = "https://sourceforge.net/projects/atlas2/files/Atlas2_v{{version}}.zip" +version_available = ["1.4.3", "1.4.1"] + +[beagle] +source_url = "http://faculty.washington.edu/browning/beagle/{{version}}" +version_available = ["beagle.08Jun17.d8b.jar", "beagle_4.1_21Jan17.pdf", "run.beagle.08Jun17.d8b.example", + "release_notes", "beagle.170608.zip"] + +[contest] +source_url = ["http://bioinfo.rjh.com.cn/download/bioinstaller/contest/contest-{{version}}-bin.zip", + "hg19_population_stratified_af_hapmap_3.3.vcf.gz"] +url_all_download = true +version_available = "1.0.24530" +[contest.install] +linux = "mkdir -p {{destdir}}/bin; cp {{download.dir}}/*.jar {{destdir}}/bin" +mac = "mkdir -p {{destdir}}/bin; cp {{download.dir}}/*.jar {{destdir}}/bin" diff --git a/inst/extdata/config/nongithub/nongithub_meta.toml b/inst/extdata/config/nongithub/nongithub_meta.toml index 349a1a9..b137322 100644 --- a/inst/extdata/config/nongithub/nongithub_meta.toml +++ b/inst/extdata/config/nongithub/nongithub_meta.toml @@ -35,3 +35,30 @@ title = "annotate a VCF with other VCFs/BEDs/tabixed files" description = "vcfanno allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs or BED files. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF." publication = "Pedersen B S, Layer R M, Quinlan A R. Vcfanno: fast, flexible annotation of genetic variants[J]. Genome Biology, 2016, 17(1):1-9." tag = ["NGS", "annotation"] + +[nongithub.item.absolute] +title = "ABSOLUTE can estimate purity/ploidy, and from that compute absolute copy-number and mutation multiplicities." +description = "When DNA is extracted from an admixed population of cancer and normal cells, the information on absolute copy number per cancer cell is lost in the mixing. The purpose of ABSOLUTE is to re-extract these data from the mixed DNA population. This process begins by generation of segmented copy number data, which is input to the ABSOLUTE algorithm together with pre-computed models of recurrent cancer karyotypes and, optionally, allelic fraction values for somatic point mutations. The output of ABSOLUTE then provides re-extracted information on the absolute cellular copy number of local DNA segments and, for point mutations, the number of mutated alleles." +publication = "Carter S L, Cibulskis K, Helman E, et al. Absolute quantification of somatic DNA alterations in human cancer[J]. Nature biotechnology, 2012, 30(5): 413-421." + +[nongithub.item.hapseg] +title = "A probabilistic method to interpret bi-allelic marker data in cancer samples." +description = "The HAPSEG module takes single nucleotide polymorphism (SNP) microarray data and outputs copy number data segmented by haplotype. The output data is suitable for use as input data for the ABSOLUTE module. More detail see http://software.broadinstitute.org/cancer/software/genepattern/modules/docs/HAPSEG/1" +publication = "Carter SL, Meyerson M, Getz G. Accurate estimation of homologue-specific DNA concentration-ratios in cancer samples allows long-range haplotyping. Available from Nature Precedings; 2011." + +[nongithub.item.atlas2] +title = "Atlas2, next-generation sequencing suite of variant analysis tools specializing in the separation of true SNPs and insertions and deletions (indels)" +description = "Atlas2 is a next-generation sequencing suite of variant analysis tools specializing in the separation of true SNPs and insertions and deletions (indels) from sequencing and mapping errors in Whole Exome Capture Sequencing (WECS) data." +publication = "Challis D. etc. An integrative variant analysis suite for whole exome next-generation sequencing data. BMC Bioinformatics 2012, 13:8 doi:10.1186/1471-2105-13-8" + +[nongithub.item.beagle] +title = "Beagle, a software package that performs genotype calling, genotype phasing, imputation of ungenotyped markers, and identity-by-descent segment detection." +description = """Beagle version 4.1 has a more accurate genotype phasing algorithm and a very fast and accurate genotype imputation algorithm. Version 4.1 also has several changes to the command line arguments which are described in the release notes. The "ped" argument has no effect in version 4.1. If your data contains nuclear families and you want to model the parent-offspring relationships when phasing genotypes, please use version 4.0.""" +publication = ["S R Browning and B L Browning (2007) Rapid and accurate haplotype phasing and missing data inference for whole genome association studies by use of localized haplotype clustering. Am J Hum Genet 81:1084-1097. doi:10.1086/521987", + "B L Browning and S R Browning (2013). Improving the accuracy and efficiency of identity-by-descent detection in population data. Genetics 194(2):459-71. doi:10.1534/genetics.113.150029", + "B L Browning and S R Browning (2016). Genotype imputation with millions of reference samples. Am J Hum Genet 98:116-126. doi:10.1016/j.ajhg.2015.11.020"] + +[nongithub.item.contest] +title = "ContEst is a tool (and method) for estimating the amount of cross-sample contamination in next generation sequencing data. Using a Bayesian framework, contamination levels are estimated from array based genotypes and sequencing reads." +description = "Here, we present ContEst, a tool for estimating the level of cross-individual contamination in next-generation sequencing data. We demonstrate the accuracy of ContEst across a range of contamination levels, sources and read depths using sequencing data mixed in silico at known concentrations. We applied our tool to published cancer sequencing datasets and report their estimated contamination levels." +publication = "Cibulskis K, Mckenna A, Fennell T, et al. ContEst: estimating cross-contamination of human samples in next-generation sequencing data[J]. Bioinformatics, 2011, 27(18):2601-2602."