From dfa846a271b3e3fe084db799d124b104d0c877d6 Mon Sep 17 00:00:00 2001 From: Jim Havrilla Date: Mon, 23 Jul 2018 11:45:00 -0600 Subject: [PATCH] added PrimateAI, uploaded source file to s3 --- score-sets/GRCh37/PrimateAI/README.md | 7 +++++++ score-sets/GRCh37/PrimateAI/make.sh | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 score-sets/GRCh37/PrimateAI/README.md create mode 100644 score-sets/GRCh37/PrimateAI/make.sh diff --git a/score-sets/GRCh37/PrimateAI/README.md b/score-sets/GRCh37/PrimateAI/README.md new file mode 100644 index 0000000..bed6725 --- /dev/null +++ b/score-sets/GRCh37/PrimateAI/README.md @@ -0,0 +1,7 @@ +Paper at: www.nature.com/articles/s41588-018-0167-z + +Using hundreds of thousands of common variants from population sequencing of six non-human primate species, PrimateAI is a trained deep neural network to classify variants' pathogenicity. It predicts impact purely from sequence and predicts secondary structure information and solvent accessibility information. + +From the paper: + +"Prediction scores for all ~70 million human missense variants on the hg19/GRCh37 genome build with the human+primate deep learning network (PrimateAI) are publicly hosted (https://basespace.illumina.com/s/cPgCSmecvhb4). For practical application of PrimateAI scores, we recommend a threshold of >0.8 for likely pathogenic classification, <0.6 for likely benign, and 0.6-0.8 as intermediate in genes with dominant modes of inheritance, on the basis of the enrichment of de novo variants in cases as compared to controls (Fig. 3d), and a threshold of >0.7 for likely pathogenic and <0.5 for likely benign in genes with recessive modes of inheritance." diff --git a/score-sets/GRCh37/PrimateAI/make.sh b/score-sets/GRCh37/PrimateAI/make.sh new file mode 100644 index 0000000..8e7384c --- /dev/null +++ b/score-sets/GRCh37/PrimateAI/make.sh @@ -0,0 +1,7 @@ +# use 11th column +if [ ! -s PrimateAI_scores_v0.2.tsv.gz ]; then + wget https://s3.us-east-2.amazonaws.com/pathoscore-data/primateai/PrimateAI_scores_v0.2.tsv.gz +fi +zgrep -v "^#" PrimateAI_scores_v0.2.tsv.gz | sed '1d' | sed '1s/^/#/' | sort -k1,1 -k2,2n > PrimateAI_scores_v0.2.tsv +bgzip -f PrimateAI_scores_v0.2.tsv +tabix -b 2 -e 2 PrimateAI_scores_v0.2.tsv.gz